blob: ea1069df336b038eaea5f92896403c0cfcb4a291 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Eric Smitha9f7d622008-02-17 19:46:49 +00007#include "formatter_string.h"
8
Guido van Rossum013142a1994-08-30 08:19:36 +00009#include <ctype.h>
10
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000011#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
Tim Petersae1d0c92006-03-17 03:29:34 +000023 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000024 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000028/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000029 For both PyString_FromString() and PyString_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000034 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000035
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000036 For PyString_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyString_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyString_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000053*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000055PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Tim Peters9e897f42001-05-09 07:37:07 +000057 register PyStringObject *op;
Gregory P. Smithc00eb732008-04-09 23:16:37 +000058 if (size < 0) {
59 PyErr_SetString(PyExc_SystemError,
60 "Negative size passed to PyString_FromStringAndSize");
61 return NULL;
62 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 if (size == 0 && (op = nullstring) != NULL) {
64#ifdef COUNT_ALLOCS
65 null_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (size == 1 && str != NULL &&
71 (op = characters[*str & UCHAR_MAX]) != NULL)
72 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000079
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000080 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000081 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000084 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000086 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000088 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000090 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105}
106
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000108PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109{
Tim Peters62de65b2001-12-06 20:29:32 +0000110 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000111 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000112
113 assert(str != NULL);
114 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000115 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000116 PyErr_SetString(PyExc_OverflowError,
117 "string is too long for a Python string");
118 return NULL;
119 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 if (size == 0 && (op = nullstring) != NULL) {
121#ifdef COUNT_ALLOCS
122 null_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
127 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
128#ifdef COUNT_ALLOCS
129 one_strings++;
130#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000135 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000136 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000137 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000139 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000141 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000142 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000143 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158}
159
Barry Warsawdadace02001-08-24 18:32:06 +0000160PyObject *
161PyString_FromFormatV(const char *format, va_list vargs)
162{
Tim Petersc15c4f12001-10-02 21:32:07 +0000163 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000164 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000165 const char* f;
166 char *s;
167 PyObject* string;
168
Tim Petersc15c4f12001-10-02 21:32:07 +0000169#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000170 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000171#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#ifdef __va_copy
173 __va_copy(count, vargs);
174#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000175 count = vargs;
176#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000177#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000178 /* step 1: figure out how large a buffer we need */
179 for (f = format; *f; f++) {
180 if (*f == '%') {
181 const char* p = f;
182 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
183 ;
184
Tim Peters8931ff12006-05-13 23:28:20 +0000185 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
186 * they don't affect the amount of space we reserve.
187 */
188 if ((*f == 'l' || *f == 'z') &&
189 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000190 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000191
Barry Warsawdadace02001-08-24 18:32:06 +0000192 switch (*f) {
193 case 'c':
194 (void)va_arg(count, int);
195 /* fall through... */
196 case '%':
197 n++;
198 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000199 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000200 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000201 /* 20 bytes is enough to hold a 64-bit
202 integer. Decimal takes the most space.
203 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000204 n += 20;
205 break;
206 case 's':
207 s = va_arg(count, char*);
208 n += strlen(s);
209 break;
210 case 'p':
211 (void) va_arg(count, int);
212 /* maximum 64-bit pointer representation:
213 * 0xffffffffffffffff
214 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000215 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000216 */
217 n += 19;
218 break;
219 default:
220 /* if we stumble upon an unknown
221 formatting code, copy the rest of
222 the format string to the output
223 string. (we cannot just skip the
224 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000225 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000226 n += strlen(p);
227 goto expand;
228 }
229 } else
230 n++;
231 }
232 expand:
233 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000234 /* Since we've analyzed how much space we need for the worst case,
235 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000236 string = PyString_FromStringAndSize(NULL, n);
237 if (!string)
238 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000239
Barry Warsawdadace02001-08-24 18:32:06 +0000240 s = PyString_AsString(string);
241
242 for (f = format; *f; f++) {
243 if (*f == '%') {
244 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000245 Py_ssize_t i;
246 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000247 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000248 /* parse the width.precision part (we're only
249 interested in the precision value, if any) */
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 if (*f == '.') {
254 f++;
255 n = 0;
256 while (isdigit(Py_CHARMASK(*f)))
257 n = (n*10) + *f++ - '0';
258 }
259 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
260 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000261 /* handle the long flag, but only for %ld and %lu.
262 others can be added when necessary. */
263 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000264 longflag = 1;
265 ++f;
266 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000267 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000268 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000269 size_tflag = 1;
270 ++f;
271 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000272
Barry Warsawdadace02001-08-24 18:32:06 +0000273 switch (*f) {
274 case 'c':
275 *s++ = va_arg(vargs, int);
276 break;
277 case 'd':
278 if (longflag)
279 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000280 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000281 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
282 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000283 else
284 sprintf(s, "%d", va_arg(vargs, int));
285 s += strlen(s);
286 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000287 case 'u':
288 if (longflag)
289 sprintf(s, "%lu",
290 va_arg(vargs, unsigned long));
291 else if (size_tflag)
292 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
293 va_arg(vargs, size_t));
294 else
295 sprintf(s, "%u",
296 va_arg(vargs, unsigned int));
297 s += strlen(s);
298 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000299 case 'i':
300 sprintf(s, "%i", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'x':
304 sprintf(s, "%x", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 's':
308 p = va_arg(vargs, char*);
309 i = strlen(p);
310 if (n > 0 && i > n)
311 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000312 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000313 s += i;
314 break;
315 case 'p':
316 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000317 /* %p is ill-defined: ensure leading 0x. */
318 if (s[1] == 'X')
319 s[1] = 'x';
320 else if (s[1] != 'x') {
321 memmove(s+2, s, strlen(s)+1);
322 s[0] = '0';
323 s[1] = 'x';
324 }
Barry Warsawdadace02001-08-24 18:32:06 +0000325 s += strlen(s);
326 break;
327 case '%':
328 *s++ = '%';
329 break;
330 default:
331 strcpy(s, p);
332 s += strlen(s);
333 goto end;
334 }
335 } else
336 *s++ = *f;
337 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000341 return string;
342}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000343
Barry Warsawdadace02001-08-24 18:32:06 +0000344PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000345PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000346{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000347 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000348 va_list vargs;
349
350#ifdef HAVE_STDARG_PROTOTYPES
351 va_start(vargs, format);
352#else
353 va_start(vargs);
354#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000355 ret = PyString_FromFormatV(format, vargs);
356 va_end(vargs);
357 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000358}
359
360
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000362 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363 const char *encoding,
364 const char *errors)
365{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000366 PyObject *v, *str;
367
368 str = PyString_FromStringAndSize(s, size);
369 if (str == NULL)
370 return NULL;
371 v = PyString_AsDecodedString(str, encoding, errors);
372 Py_DECREF(str);
373 return v;
374}
375
376PyObject *PyString_AsDecodedObject(PyObject *str,
377 const char *encoding,
378 const char *errors)
379{
380 PyObject *v;
381
382 if (!PyString_Check(str)) {
383 PyErr_BadArgument();
384 goto onError;
385 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387 if (encoding == NULL) {
388#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000390#else
391 PyErr_SetString(PyExc_ValueError, "no encoding specified");
392 goto onError;
393#endif
394 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395
396 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 v = PyCodec_Decode(str, encoding, errors);
398 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400
401 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000404 return NULL;
405}
406
407PyObject *PyString_AsDecodedString(PyObject *str,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v;
412
413 v = PyString_AsDecodedObject(str, encoding, errors);
414 if (v == NULL)
415 goto onError;
416
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000417#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 /* Convert Unicode to a string using the default encoding */
419 if (PyUnicode_Check(v)) {
420 PyObject *temp = v;
421 v = PyUnicode_AsEncodedString(v, NULL, NULL);
422 Py_DECREF(temp);
423 if (v == NULL)
424 goto onError;
425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000426#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427 if (!PyString_Check(v)) {
428 PyErr_Format(PyExc_TypeError,
429 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000430 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000431 Py_DECREF(v);
432 goto onError;
433 }
434
435 return v;
436
437 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 return NULL;
439}
440
441PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000442 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000447
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000448 str = PyString_FromStringAndSize(s, size);
449 if (str == NULL)
450 return NULL;
451 v = PyString_AsEncodedString(str, encoding, errors);
452 Py_DECREF(str);
453 return v;
454}
455
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 const char *encoding,
458 const char *errors)
459{
460 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000461
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(str)) {
463 PyErr_BadArgument();
464 goto onError;
465 }
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467 if (encoding == NULL) {
468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000470#else
471 PyErr_SetString(PyExc_ValueError, "no encoding specified");
472 goto onError;
473#endif
474 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475
476 /* Encode via the codec registry */
477 v = PyCodec_Encode(str, encoding, errors);
478 if (v == NULL)
479 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000480
481 return v;
482
483 onError:
484 return NULL;
485}
486
487PyObject *PyString_AsEncodedString(PyObject *str,
488 const char *encoding,
489 const char *errors)
490{
491 PyObject *v;
492
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000493 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494 if (v == NULL)
495 goto onError;
496
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000497#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000498 /* Convert Unicode to a string using the default encoding */
499 if (PyUnicode_Check(v)) {
500 PyObject *temp = v;
501 v = PyUnicode_AsEncodedString(v, NULL, NULL);
502 Py_DECREF(temp);
503 if (v == NULL)
504 goto onError;
505 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000506#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 if (!PyString_Check(v)) {
508 PyErr_Format(PyExc_TypeError,
509 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000510 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 Py_DECREF(v);
512 goto onError;
513 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000514
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000515 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000516
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000517 onError:
518 return NULL;
519}
520
Guido van Rossum234f9421993-06-17 12:35:49 +0000521static void
Fred Drakeba096332000-07-09 07:04:36 +0000522string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000524 switch (PyString_CHECK_INTERNED(op)) {
525 case SSTATE_NOT_INTERNED:
526 break;
527
528 case SSTATE_INTERNED_MORTAL:
529 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000530 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000531 if (PyDict_DelItem(interned, op) != 0)
532 Py_FatalError(
533 "deletion of interned string failed");
534 break;
535
536 case SSTATE_INTERNED_IMMORTAL:
537 Py_FatalError("Immortal interned string died.");
538
539 default:
540 Py_FatalError("Inconsistent interned string state.");
541 }
Christian Heimese93237d2007-12-19 02:37:44 +0000542 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000543}
544
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545/* Unescape a backslash-escaped string. If unicode is non-zero,
546 the string is a u-literal. If recode_encoding is non-zero,
547 the string is UTF-8 encoded and should be re-encoded in the
548 specified encoding. */
549
550PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000551 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000552 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 const char *recode_encoding)
555{
556 int c;
557 char *p, *buf;
558 const char *end;
559 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000560 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000561 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000562 if (v == NULL)
563 return NULL;
564 p = buf = PyString_AsString(v);
565 end = s + len;
566 while (s < end) {
567 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000568 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569#ifdef Py_USING_UNICODE
570 if (recode_encoding && (*s & 0x80)) {
571 PyObject *u, *w;
572 char *r;
573 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000574 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575 t = s;
576 /* Decode non-ASCII bytes as UTF-8. */
577 while (t < end && (*t & 0x80)) t++;
578 u = PyUnicode_DecodeUTF8(s, t - s, errors);
579 if(!u) goto failed;
580
581 /* Recode them in target encoding. */
582 w = PyUnicode_AsEncodedString(
583 u, recode_encoding, errors);
584 Py_DECREF(u);
585 if (!w) goto failed;
586
587 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000588 assert(PyString_Check(w));
589 r = PyString_AS_STRING(w);
590 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000591 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000592 p += rn;
593 Py_DECREF(w);
594 s = t;
595 } else {
596 *p++ = *s++;
597 }
598#else
599 *p++ = *s++;
600#endif
601 continue;
602 }
603 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000604 if (s==end) {
605 PyErr_SetString(PyExc_ValueError,
606 "Trailing \\ in string");
607 goto failed;
608 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000609 switch (*s++) {
610 /* XXX This assumes ASCII! */
611 case '\n': break;
612 case '\\': *p++ = '\\'; break;
613 case '\'': *p++ = '\''; break;
614 case '\"': *p++ = '\"'; break;
615 case 'b': *p++ = '\b'; break;
616 case 'f': *p++ = '\014'; break; /* FF */
617 case 't': *p++ = '\t'; break;
618 case 'n': *p++ = '\n'; break;
619 case 'r': *p++ = '\r'; break;
620 case 'v': *p++ = '\013'; break; /* VT */
621 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
622 case '0': case '1': case '2': case '3':
623 case '4': case '5': case '6': case '7':
624 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000625 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 c = (c<<3) + *s++ - '0';
629 }
630 *p++ = c;
631 break;
632 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000633 if (s+1 < end &&
634 isxdigit(Py_CHARMASK(s[0])) &&
635 isxdigit(Py_CHARMASK(s[1])))
636 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000637 unsigned int x = 0;
638 c = Py_CHARMASK(*s);
639 s++;
640 if (isdigit(c))
641 x = c - '0';
642 else if (islower(c))
643 x = 10 + c - 'a';
644 else
645 x = 10 + c - 'A';
646 x = x << 4;
647 c = Py_CHARMASK(*s);
648 s++;
649 if (isdigit(c))
650 x += c - '0';
651 else if (islower(c))
652 x += 10 + c - 'a';
653 else
654 x += 10 + c - 'A';
655 *p++ = x;
656 break;
657 }
658 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000659 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000660 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000661 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 }
663 if (strcmp(errors, "replace") == 0) {
664 *p++ = '?';
665 } else if (strcmp(errors, "ignore") == 0)
666 /* do nothing */;
667 else {
668 PyErr_Format(PyExc_ValueError,
669 "decoding error; "
670 "unknown error handling code: %.400s",
671 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000672 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674#ifndef Py_USING_UNICODE
675 case 'u':
676 case 'U':
677 case 'N':
678 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000679 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 "Unicode escapes not legal "
681 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000682 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684#endif
685 default:
686 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000687 s--;
688 goto non_esc; /* an arbitry number of unescaped
689 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000690 }
691 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000692 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 return v;
695 failed:
696 Py_DECREF(v);
697 return NULL;
698}
699
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000700/* -------------------------------------------------------------------- */
701/* object api */
702
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704string_getsize(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return -1;
710 return len;
711}
712
713static /*const*/ char *
714string_getbuffer(register PyObject *op)
715{
716 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyString_AsStringAndSize(op, &s, &len))
719 return NULL;
720 return s;
721}
722
Martin v. Löwis18e16552006-02-15 17:27:45 +0000723Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000728 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
731/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000732PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000734 if (!PyString_Check(op))
735 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737}
738
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739int
740PyString_AsStringAndSize(register PyObject *obj,
741 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000742 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743{
744 if (s == NULL) {
745 PyErr_BadInternalCall();
746 return -1;
747 }
748
749 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000750#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 if (PyUnicode_Check(obj)) {
752 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
753 if (obj == NULL)
754 return -1;
755 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000756 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000757#endif
758 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_Format(PyExc_TypeError,
760 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000761 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 return -1;
763 }
764 }
765
766 *s = PyString_AS_STRING(obj);
767 if (len != NULL)
768 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000769 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000770 PyErr_SetString(PyExc_TypeError,
771 "expected string without null bytes");
772 return -1;
773 }
774 return 0;
775}
776
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000778/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000779
Eric Smitha9f7d622008-02-17 19:46:49 +0000780#include "stringlib/stringdefs.h"
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Eric Smithcf537ff2008-05-11 19:52:48 +0000787#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
788#include "stringlib/localeutil.h"
789
790
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792static int
Fred Drakeba096332000-07-09 07:04:36 +0000793string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794{
Brett Cannon01531592007-09-17 03:28:34 +0000795 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000797 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000798
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000800 if (! PyString_CheckExact(op)) {
801 int ret;
802 /* A str subclass may have its own __str__ method. */
803 op = (PyStringObject *) PyObject_Str((PyObject *)op);
804 if (op == NULL)
805 return -1;
806 ret = string_print(op, fp, flags);
807 Py_DECREF(op);
808 return ret;
809 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000810 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000811 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000812 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000813 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000814 while (size > INT_MAX) {
815 /* Very long strings cannot be written atomically.
816 * But don't write exactly INT_MAX bytes at a time
817 * to avoid memory aligment issues.
818 */
819 const int chunk_size = INT_MAX & ~0x3FFF;
820 fwrite(data, 1, chunk_size, fp);
821 data += chunk_size;
822 size -= chunk_size;
823 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000825 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000827 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000828#endif
Brett Cannon01531592007-09-17 03:28:34 +0000829 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000830 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832
Thomas Wouters7e474022000-07-16 12:04:32 +0000833 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000834 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000835 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
836 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 quote = '"';
838
Christian Heimese93237d2007-12-19 02:37:44 +0000839 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000840 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000841 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000842 for (i = 0; i < str_len; i++) {
843 /* Since strings are immutable and the caller should have a
844 reference, accessing the interal buffer should not be an issue
845 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000846 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000847 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000853 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fprintf(fp, "\\r");
855 else if (c < ' ' || c >= 0x7f)
856 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000857 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000858 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000861 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000862 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863}
864
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000865PyObject *
866PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000868 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000869 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000871 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000872 PyErr_SetString(PyExc_OverflowError,
873 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000874 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000875 }
876 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000878 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879 }
880 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000881 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 register char c;
883 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 int quote;
885
Thomas Wouters7e474022000-07-16 12:04:32 +0000886 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000888 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000889 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
890 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000891 quote = '"';
892
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000895 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000896 /* There's at least enough room for a hex escape
897 and a closing quote. */
898 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000900 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000902 else if (c == '\t')
903 *p++ = '\\', *p++ = 't';
904 else if (c == '\n')
905 *p++ = '\\', *p++ = 'n';
906 else if (c == '\r')
907 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000908 else if (c < ' ' || c >= 0x7f) {
909 /* For performance, we don't want to call
910 PyOS_snprintf here (extra layers of
911 function call). */
912 sprintf(p, "\\x%02x", c & 0xff);
913 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000914 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000915 else
916 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000918 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000919 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000922 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000923 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000924 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925}
926
Guido van Rossum189f1df2001-05-01 16:51:53 +0000927static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000928string_repr(PyObject *op)
929{
930 return PyString_Repr(op, 1);
931}
932
933static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000934string_str(PyObject *s)
935{
Tim Petersc9933152001-10-16 20:18:24 +0000936 assert(PyString_Check(s));
937 if (PyString_CheckExact(s)) {
938 Py_INCREF(s);
939 return s;
940 }
941 else {
942 /* Subtype -- return genuine string with the same value. */
943 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000944 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000945 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000946}
947
Martin v. Löwis18e16552006-02-15 17:27:45 +0000948static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000949string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950{
Christian Heimese93237d2007-12-19 02:37:44 +0000951 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952}
953
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000955string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956{
Andrew Dalke598710c2006-05-25 18:18:39 +0000957 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 register PyStringObject *op;
959 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000960#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000961 if (PyUnicode_Check(bb))
962 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000963#endif
Christian Heimes3497f942008-05-26 12:29:14 +0000964 if (PyByteArray_Check(bb))
965 return PyByteArray_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000966 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000967 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000968 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000969 return NULL;
970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000973 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000974 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000975 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000976 Py_INCREF(bb);
977 return bb;
978 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 Py_INCREF(a);
980 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981 }
Christian Heimese93237d2007-12-19 02:37:44 +0000982 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000983 if (size < 0) {
984 PyErr_SetString(PyExc_OverflowError,
985 "strings are too large to concat");
986 return NULL;
987 }
988
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000989 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000990 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000991 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000993 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000994 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000995 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000996 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
997 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000998 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000#undef b
1001}
1002
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001003static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001004string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001005{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001006 register Py_ssize_t i;
1007 register Py_ssize_t j;
1008 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001009 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001010 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001011 if (n < 0)
1012 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001013 /* watch out for overflows: the size can overflow int,
1014 * and the # of bytes needed can overflow size_t
1015 */
Christian Heimese93237d2007-12-19 02:37:44 +00001016 size = Py_SIZE(a) * n;
1017 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001018 PyErr_SetString(PyExc_OverflowError,
1019 "repeated string is too long");
1020 return NULL;
1021 }
Christian Heimese93237d2007-12-19 02:37:44 +00001022 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023 Py_INCREF(a);
1024 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025 }
Tim Peterse7c05322004-06-27 17:24:49 +00001026 nbytes = (size_t)size;
1027 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001028 PyErr_SetString(PyExc_OverflowError,
1029 "repeated string is too long");
1030 return NULL;
1031 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001033 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001034 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001036 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001037 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001038 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001039 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001040 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001041 memset(op->ob_sval, a->ob_sval[0] , n);
1042 return (PyObject *) op;
1043 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 i = 0;
1045 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001046 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1047 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 }
1049 while (i < size) {
1050 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001051 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001052 i += j;
1053 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001054 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055}
1056
1057/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1058
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001060string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001061 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001062 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063{
1064 if (i < 0)
1065 i = 0;
1066 if (j < 0)
1067 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001068 if (j > Py_SIZE(a))
1069 j = Py_SIZE(a);
1070 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001071 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001072 Py_INCREF(a);
1073 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074 }
1075 if (j < i)
1076 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001077 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078}
1079
Guido van Rossum9284a572000-03-07 15:53:43 +00001080static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001082{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001084#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001085 if (PyUnicode_Check(sub_obj))
1086 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001087#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001088 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001089 PyErr_Format(PyExc_TypeError,
1090 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001091 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001092 return -1;
1093 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001094 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001095
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001096 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001097}
1098
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001100string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001101{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001102 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001103 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001104 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001105 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001106 return NULL;
1107 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001108 pchar = a->ob_sval[i];
1109 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001110 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001111 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001112 else {
1113#ifdef COUNT_ALLOCS
1114 one_strings++;
1115#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001116 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001117 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001118 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119}
1120
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121static PyObject*
1122string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001123{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001125 Py_ssize_t len_a, len_b;
1126 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 PyObject *result;
1128
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001129 /* Make sure both arguments are strings. */
1130 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001131 result = Py_NotImplemented;
1132 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001133 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001134 if (a == b) {
1135 switch (op) {
1136 case Py_EQ:case Py_LE:case Py_GE:
1137 result = Py_True;
1138 goto out;
1139 case Py_NE:case Py_LT:case Py_GT:
1140 result = Py_False;
1141 goto out;
1142 }
1143 }
1144 if (op == Py_EQ) {
1145 /* Supporting Py_NE here as well does not save
1146 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001147 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001148 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001149 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001150 result = Py_True;
1151 } else {
1152 result = Py_False;
1153 }
1154 goto out;
1155 }
Christian Heimese93237d2007-12-19 02:37:44 +00001156 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001157 min_len = (len_a < len_b) ? len_a : len_b;
1158 if (min_len > 0) {
1159 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1160 if (c==0)
1161 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001162 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001163 c = 0;
1164 if (c == 0)
1165 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1166 switch (op) {
1167 case Py_LT: c = c < 0; break;
1168 case Py_LE: c = c <= 0; break;
1169 case Py_EQ: assert(0); break; /* unreachable */
1170 case Py_NE: c = c != 0; break;
1171 case Py_GT: c = c > 0; break;
1172 case Py_GE: c = c >= 0; break;
1173 default:
1174 result = Py_NotImplemented;
1175 goto out;
1176 }
1177 result = c ? Py_True : Py_False;
1178 out:
1179 Py_INCREF(result);
1180 return result;
1181}
1182
1183int
1184_PyString_Eq(PyObject *o1, PyObject *o2)
1185{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001186 PyStringObject *a = (PyStringObject*) o1;
1187 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001188 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001189 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001190 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001191}
1192
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193static long
Fred Drakeba096332000-07-09 07:04:36 +00001194string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001195{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001196 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001197 register unsigned char *p;
1198 register long x;
1199
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001200 if (a->ob_shash != -1)
1201 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001202 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001203 p = (unsigned char *) a->ob_sval;
1204 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001205 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001206 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001207 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001208 if (x == -1)
1209 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001210 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001211 return x;
1212}
1213
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214static PyObject*
1215string_subscript(PyStringObject* self, PyObject* item)
1216{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001217 if (PyIndex_Check(item)) {
1218 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 if (i == -1 && PyErr_Occurred())
1220 return NULL;
1221 if (i < 0)
1222 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001223 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 }
1225 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001226 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227 char* source_buf;
1228 char* result_buf;
1229 PyObject* result;
1230
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 PyString_GET_SIZE(self),
1233 &start, &stop, &step, &slicelength) < 0) {
1234 return NULL;
1235 }
1236
1237 if (slicelength <= 0) {
1238 return PyString_FromStringAndSize("", 0);
1239 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001240 else if (start == 0 && step == 1 &&
1241 slicelength == PyString_GET_SIZE(self) &&
1242 PyString_CheckExact(self)) {
1243 Py_INCREF(self);
1244 return (PyObject *)self;
1245 }
1246 else if (step == 1) {
1247 return PyString_FromStringAndSize(
1248 PyString_AS_STRING(self) + start,
1249 slicelength);
1250 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001251 else {
1252 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001253 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001254 if (result_buf == NULL)
1255 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001256
Tim Petersae1d0c92006-03-17 03:29:34 +00001257 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001258 cur += step, i++) {
1259 result_buf[i] = source_buf[cur];
1260 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001261
1262 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001263 slicelength);
1264 PyMem_Free(result_buf);
1265 return result;
1266 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001267 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001269 PyErr_Format(PyExc_TypeError,
1270 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001271 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001272 return NULL;
1273 }
1274}
1275
Martin v. Löwis18e16552006-02-15 17:27:45 +00001276static Py_ssize_t
1277string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278{
1279 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001280 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001281 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282 return -1;
1283 }
1284 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001285 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286}
1287
Martin v. Löwis18e16552006-02-15 17:27:45 +00001288static Py_ssize_t
1289string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001290{
Guido van Rossum045e6881997-09-08 18:30:11 +00001291 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001292 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001293 return -1;
1294}
1295
Martin v. Löwis18e16552006-02-15 17:27:45 +00001296static Py_ssize_t
1297string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298{
1299 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001300 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001301 return 1;
1302}
1303
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304static Py_ssize_t
1305string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001306{
1307 if ( index != 0 ) {
1308 PyErr_SetString(PyExc_SystemError,
1309 "accessing non-existent string segment");
1310 return -1;
1311 }
1312 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001313 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001314}
1315
Christian Heimes1a6387e2008-03-26 12:49:49 +00001316static int
1317string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1318{
1319 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1320 0, flags);
1321}
1322
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001323static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001325 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001326 (ssizeargfunc)string_repeat, /*sq_repeat*/
1327 (ssizeargfunc)string_item, /*sq_item*/
1328 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001329 0, /*sq_ass_item*/
1330 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001331 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001332};
1333
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001334static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001335 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001336 (binaryfunc)string_subscript,
1337 0,
1338};
1339
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001340static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001341 (readbufferproc)string_buffer_getreadbuf,
1342 (writebufferproc)string_buffer_getwritebuf,
1343 (segcountproc)string_buffer_getsegcount,
1344 (charbufferproc)string_buffer_getcharbuf,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001345 (getbufferproc)string_buffer_getbuffer,
1346 0, /* XXX */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001347};
1348
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349
1350
1351#define LEFTSTRIP 0
1352#define RIGHTSTRIP 1
1353#define BOTHSTRIP 2
1354
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001355/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001356static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1357
1358#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001359
Andrew Dalke525eab32006-05-26 14:00:45 +00001360
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001361/* Don't call if length < 2 */
1362#define Py_STRING_MATCH(target, offset, pattern, length) \
1363 (target[offset] == pattern[0] && \
1364 target[offset+length-1] == pattern[length-1] && \
1365 !memcmp(target+offset+1, pattern+1, length-2) )
1366
1367
Andrew Dalke525eab32006-05-26 14:00:45 +00001368/* Overallocate the initial list to reduce the number of reallocs for small
1369 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1370 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1371 text (roughly 11 words per line) and field delimited data (usually 1-10
1372 fields). For large strings the split algorithms are bandwidth limited
1373 so increasing the preallocation likely will not improve things.*/
1374
1375#define MAX_PREALLOC 12
1376
1377/* 5 splits gives 6 elements */
1378#define PREALLOC_SIZE(maxsplit) \
1379 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1380
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001381#define SPLIT_APPEND(data, left, right) \
1382 str = PyString_FromStringAndSize((data) + (left), \
1383 (right) - (left)); \
1384 if (str == NULL) \
1385 goto onError; \
1386 if (PyList_Append(list, str)) { \
1387 Py_DECREF(str); \
1388 goto onError; \
1389 } \
1390 else \
1391 Py_DECREF(str);
1392
Andrew Dalke02758d62006-05-26 15:21:01 +00001393#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001394 str = PyString_FromStringAndSize((data) + (left), \
1395 (right) - (left)); \
1396 if (str == NULL) \
1397 goto onError; \
1398 if (count < MAX_PREALLOC) { \
1399 PyList_SET_ITEM(list, count, str); \
1400 } else { \
1401 if (PyList_Append(list, str)) { \
1402 Py_DECREF(str); \
1403 goto onError; \
1404 } \
1405 else \
1406 Py_DECREF(str); \
1407 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001408 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001409
1410/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001411#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001412
Andrew Dalke02758d62006-05-26 15:21:01 +00001413#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1414#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1415#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1416#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1417
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001418Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001419split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420{
Skip Montanaro26015492007-12-08 15:33:24 +00001421 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001422 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001424 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425
1426 if (list == NULL)
1427 return NULL;
1428
Andrew Dalke02758d62006-05-26 15:21:01 +00001429 i = j = 0;
1430
1431 while (maxsplit-- > 0) {
1432 SKIP_SPACE(s, i, len);
1433 if (i==len) break;
1434 j = i; i++;
1435 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001436 if (j == 0 && i == len && PyString_CheckExact(self)) {
1437 /* No whitespace in self, so just use it as list[0] */
1438 Py_INCREF(self);
1439 PyList_SET_ITEM(list, 0, (PyObject *)self);
1440 count++;
1441 break;
1442 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001443 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001445
1446 if (i < len) {
1447 /* Only occurs when maxsplit was reached */
1448 /* Skip any remaining whitespace and copy to end of string */
1449 SKIP_SPACE(s, i, len);
1450 if (i != len)
1451 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001453 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001455 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 Py_DECREF(list);
1457 return NULL;
1458}
1459
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001460Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001461split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462{
Skip Montanaro26015492007-12-08 15:33:24 +00001463 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001466 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001467
1468 if (list == NULL)
1469 return NULL;
1470
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001471 i = j = 0;
1472 while ((j < len) && (maxcount-- > 0)) {
1473 for(; j<len; j++) {
1474 /* I found that using memchr makes no difference */
1475 if (s[j] == ch) {
1476 SPLIT_ADD(s, i, j);
1477 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001479 }
1480 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001481 }
Skip Montanaro26015492007-12-08 15:33:24 +00001482 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1483 /* ch not in self, so just use self as list[0] */
1484 Py_INCREF(self);
1485 PyList_SET_ITEM(list, 0, (PyObject *)self);
1486 count++;
1487 }
1488 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001489 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001491 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492 return list;
1493
1494 onError:
1495 Py_DECREF(list);
1496 return NULL;
1497}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001499PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500"S.split([sep [,maxsplit]]) -> list of strings\n\
1501\n\
1502Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001504splits are done. If sep is not specified or is None, any\n\
Georg Brandlecbbd942008-05-11 20:53:55 +00001505whitespace string is a separator and empty strings are removed\n\
1506from the result.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507
1508static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001509string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001511 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001512 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001513 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001514 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001515#ifdef USE_FAST
1516 Py_ssize_t pos;
1517#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518
Martin v. Löwis9c830762006-04-13 08:37:17 +00001519 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001521 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001522 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001523 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001524 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001525 if (PyString_Check(subobj)) {
1526 sub = PyString_AS_STRING(subobj);
1527 n = PyString_GET_SIZE(subobj);
1528 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001529#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001530 else if (PyUnicode_Check(subobj))
1531 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001532#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001533 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1534 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001535
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 if (n == 0) {
1537 PyErr_SetString(PyExc_ValueError, "empty separator");
1538 return NULL;
1539 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001541 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542
Andrew Dalke525eab32006-05-26 14:00:45 +00001543 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 if (list == NULL)
1545 return NULL;
1546
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001547#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001549 while (maxsplit-- > 0) {
1550 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1551 if (pos < 0)
1552 break;
1553 j = i+pos;
1554 SPLIT_ADD(s, i, j);
1555 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001557#else
1558 i = j = 0;
1559 while ((j+n <= len) && (maxsplit-- > 0)) {
1560 for (; j+n <= len; j++) {
1561 if (Py_STRING_MATCH(s, j, sub, n)) {
1562 SPLIT_ADD(s, i, j);
1563 i = j = j + n;
1564 break;
1565 }
1566 }
1567 }
1568#endif
1569 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001570 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 return list;
1572
Andrew Dalke525eab32006-05-26 14:00:45 +00001573 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 Py_DECREF(list);
1575 return NULL;
1576}
1577
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578PyDoc_STRVAR(partition__doc__,
1579"S.partition(sep) -> (head, sep, tail)\n\
1580\n\
1581Searches for the separator sep in S, and returns the part before it,\n\
1582the separator itself, and the part after it. If the separator is not\n\
1583found, returns S and two empty strings.");
1584
1585static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001586string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001587{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001588 const char *sep;
1589 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001590
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001591 if (PyString_Check(sep_obj)) {
1592 sep = PyString_AS_STRING(sep_obj);
1593 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001594 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001595#ifdef Py_USING_UNICODE
1596 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001597 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001598#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001599 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001600 return NULL;
1601
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001602 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001603 (PyObject*) self,
1604 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1605 sep_obj, sep, sep_len
1606 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001607}
1608
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001609PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001610"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001611\n\
1612Searches for the separator sep in S, starting at the end of S, and returns\n\
1613the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001614separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001615
1616static PyObject *
1617string_rpartition(PyStringObject *self, PyObject *sep_obj)
1618{
1619 const char *sep;
1620 Py_ssize_t sep_len;
1621
1622 if (PyString_Check(sep_obj)) {
1623 sep = PyString_AS_STRING(sep_obj);
1624 sep_len = PyString_GET_SIZE(sep_obj);
1625 }
1626#ifdef Py_USING_UNICODE
1627 else if (PyUnicode_Check(sep_obj))
1628 return PyUnicode_Partition((PyObject *) self, sep_obj);
1629#endif
1630 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1631 return NULL;
1632
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001633 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001634 (PyObject*) self,
1635 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1636 sep_obj, sep, sep_len
1637 );
1638}
1639
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001640Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001641rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001642{
Skip Montanaro26015492007-12-08 15:33:24 +00001643 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001644 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001645 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001646 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001647
1648 if (list == NULL)
1649 return NULL;
1650
Andrew Dalke02758d62006-05-26 15:21:01 +00001651 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001652
Andrew Dalke02758d62006-05-26 15:21:01 +00001653 while (maxsplit-- > 0) {
1654 RSKIP_SPACE(s, i);
1655 if (i<0) break;
1656 j = i; i--;
1657 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001658 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1659 /* No whitespace in self, so just use it as list[0] */
1660 Py_INCREF(self);
1661 PyList_SET_ITEM(list, 0, (PyObject *)self);
1662 count++;
1663 break;
1664 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001665 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001667 if (i >= 0) {
1668 /* Only occurs when maxsplit was reached */
1669 /* Skip any remaining whitespace and copy to beginning of string */
1670 RSKIP_SPACE(s, i);
1671 if (i >= 0)
1672 SPLIT_ADD(s, 0, i + 1);
1673
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001675 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001676 if (PyList_Reverse(list) < 0)
1677 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001679 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 Py_DECREF(list);
1681 return NULL;
1682}
1683
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001684Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001685rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001686{
Skip Montanaro26015492007-12-08 15:33:24 +00001687 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001688 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001689 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001690 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001691
1692 if (list == NULL)
1693 return NULL;
1694
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001695 i = j = len - 1;
1696 while ((i >= 0) && (maxcount-- > 0)) {
1697 for (; i >= 0; i--) {
1698 if (s[i] == ch) {
1699 SPLIT_ADD(s, i + 1, j + 1);
1700 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001701 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001702 }
1703 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001704 }
Skip Montanaro26015492007-12-08 15:33:24 +00001705 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1706 /* ch not in self, so just use self as list[0] */
1707 Py_INCREF(self);
1708 PyList_SET_ITEM(list, 0, (PyObject *)self);
1709 count++;
1710 }
1711 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001712 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001714 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001715 if (PyList_Reverse(list) < 0)
1716 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001717 return list;
1718
1719 onError:
1720 Py_DECREF(list);
1721 return NULL;
1722}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723
1724PyDoc_STRVAR(rsplit__doc__,
1725"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1726\n\
1727Return a list of the words in the string S, using sep as the\n\
1728delimiter string, starting at the end of the string and working\n\
1729to the front. If maxsplit is given, at most maxsplit splits are\n\
1730done. If sep is not specified or is None, any whitespace string\n\
1731is a separator.");
1732
1733static PyObject *
1734string_rsplit(PyStringObject *self, PyObject *args)
1735{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001736 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001737 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001738 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001739 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001740
Martin v. Löwis9c830762006-04-13 08:37:17 +00001741 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 return NULL;
1743 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001744 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001745 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001746 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001747 if (PyString_Check(subobj)) {
1748 sub = PyString_AS_STRING(subobj);
1749 n = PyString_GET_SIZE(subobj);
1750 }
1751#ifdef Py_USING_UNICODE
1752 else if (PyUnicode_Check(subobj))
1753 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1754#endif
1755 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1756 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001757
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001758 if (n == 0) {
1759 PyErr_SetString(PyExc_ValueError, "empty separator");
1760 return NULL;
1761 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001762 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001763 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001764
Andrew Dalke525eab32006-05-26 14:00:45 +00001765 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001766 if (list == NULL)
1767 return NULL;
1768
1769 j = len;
1770 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001771
Skip Montanaro26015492007-12-08 15:33:24 +00001772 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001773 while ( (i >= 0) && (maxsplit-- > 0) ) {
1774 for (; i>=0; i--) {
1775 if (Py_STRING_MATCH(s, i, sub, n)) {
1776 SPLIT_ADD(s, i + n, j);
1777 j = i;
1778 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001779 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001780 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001781 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001782 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001783 SPLIT_ADD(s, 0, j);
1784 FIX_PREALLOC_SIZE(list);
1785 if (PyList_Reverse(list) < 0)
1786 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001787 return list;
1788
Andrew Dalke525eab32006-05-26 14:00:45 +00001789onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001790 Py_DECREF(list);
1791 return NULL;
1792}
1793
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796"S.join(sequence) -> string\n\
1797\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001798Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001802string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
1804 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001805 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001809 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001810 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001811 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812
Tim Peters19fe14e2001-01-19 03:03:47 +00001813 seq = PySequence_Fast(orig, "");
1814 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001815 return NULL;
1816 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001817
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001818 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001819 if (seqlen == 0) {
1820 Py_DECREF(seq);
1821 return PyString_FromString("");
1822 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001825 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1826 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001827 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001828 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001829 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001831
Raymond Hettinger674f2412004-08-23 23:23:54 +00001832 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001833 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001834 * Do a pre-pass to figure out the total amount of space we'll
1835 * need (sz), see whether any argument is absurd, and defer to
1836 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001837 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001838 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001839 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001840 item = PySequence_Fast_GET_ITEM(seq, i);
1841 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001842#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001843 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001844 /* Defer to Unicode join.
1845 * CAUTION: There's no gurantee that the
1846 * original sequence can be iterated over
1847 * again, so we must pass seq here.
1848 */
1849 PyObject *result;
1850 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001851 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001852 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001853 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001854#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001855 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001856 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001857 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001858 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001859 Py_DECREF(seq);
1860 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001861 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001862 sz += PyString_GET_SIZE(item);
1863 if (i != 0)
1864 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001865 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001866 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001867 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001868 Py_DECREF(seq);
1869 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001871 }
1872
1873 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001875 if (res == NULL) {
1876 Py_DECREF(seq);
1877 return NULL;
1878 }
1879
1880 /* Catenate everything. */
1881 p = PyString_AS_STRING(res);
1882 for (i = 0; i < seqlen; ++i) {
1883 size_t n;
1884 item = PySequence_Fast_GET_ITEM(seq, i);
1885 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001886 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001887 p += n;
1888 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001889 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001890 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001891 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001893
Jeremy Hylton49048292000-07-11 03:28:17 +00001894 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896}
1897
Tim Peters52e155e2001-06-16 05:42:57 +00001898PyObject *
1899_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001900{
Tim Petersa7259592001-06-16 05:11:17 +00001901 assert(sep != NULL && PyString_Check(sep));
1902 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001903 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001904}
1905
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001906Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001908{
1909 if (*end > len)
1910 *end = len;
1911 else if (*end < 0)
1912 *end += len;
1913 if (*end < 0)
1914 *end = 0;
1915 if (*start < 0)
1916 *start += len;
1917 if (*start < 0)
1918 *start = 0;
1919}
1920
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001921Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001922string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001924 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001925 const char *sub;
1926 Py_ssize_t sub_len;
1927 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001928 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929
Facundo Batista57d56692007-11-16 18:04:14 +00001930 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1931 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001932 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001933 /* To support None in "start" and "end" arguments, meaning
1934 the same as if they were not passed.
1935 */
1936 if (obj_start != Py_None)
1937 if (!_PyEval_SliceIndex(obj_start, &start))
1938 return -2;
1939 if (obj_end != Py_None)
1940 if (!_PyEval_SliceIndex(obj_end, &end))
1941 return -2;
1942
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 if (PyString_Check(subobj)) {
1944 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001945 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001949 return PyUnicode_Find(
1950 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001951#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001952 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001953 /* XXX - the "expected a character buffer object" is pretty
1954 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 return -2;
1956
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001957 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001958 return stringlib_find_slice(
1959 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1960 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001961 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001962 return stringlib_rfind_slice(
1963 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1964 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965}
1966
1967
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001968PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969"S.find(sub [,start [,end]]) -> int\n\
1970\n\
1971Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001972such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973arguments start and end are interpreted as in slice notation.\n\
1974\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001975Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976
1977static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001978string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001980 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981 if (result == -2)
1982 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984}
1985
1986
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001987PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988"S.index(sub [,start [,end]]) -> int\n\
1989\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001990Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991
1992static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001993string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001995 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996 if (result == -2)
1997 return NULL;
1998 if (result == -1) {
1999 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002000 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 return NULL;
2002 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004}
2005
2006
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002007PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008"S.rfind(sub [,start [,end]]) -> int\n\
2009\n\
2010Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00002011such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012arguments start and end are interpreted as in slice notation.\n\
2013\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002014Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015
2016static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002017string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002019 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 if (result == -2)
2021 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002022 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023}
2024
2025
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002026PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027"S.rindex(sub [,start [,end]]) -> int\n\
2028\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002029Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030
2031static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002032string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002034 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 if (result == -2)
2036 return NULL;
2037 if (result == -1) {
2038 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002039 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 return NULL;
2041 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002042 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043}
2044
2045
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002046Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002047do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2048{
2049 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002050 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002051 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002052 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2053 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002054
2055 i = 0;
2056 if (striptype != RIGHTSTRIP) {
2057 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2058 i++;
2059 }
2060 }
2061
2062 j = len;
2063 if (striptype != LEFTSTRIP) {
2064 do {
2065 j--;
2066 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2067 j++;
2068 }
2069
2070 if (i == 0 && j == len && PyString_CheckExact(self)) {
2071 Py_INCREF(self);
2072 return (PyObject*)self;
2073 }
2074 else
2075 return PyString_FromStringAndSize(s+i, j-i);
2076}
2077
2078
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002079Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002080do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081{
2082 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002083 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085 i = 0;
2086 if (striptype != RIGHTSTRIP) {
2087 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2088 i++;
2089 }
2090 }
2091
2092 j = len;
2093 if (striptype != LEFTSTRIP) {
2094 do {
2095 j--;
2096 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2097 j++;
2098 }
2099
Tim Peters8fa5dd02001-09-12 02:18:30 +00002100 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101 Py_INCREF(self);
2102 return (PyObject*)self;
2103 }
2104 else
2105 return PyString_FromStringAndSize(s+i, j-i);
2106}
2107
2108
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002109Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2111{
2112 PyObject *sep = NULL;
2113
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002114 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 return NULL;
2116
2117 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002118 if (PyString_Check(sep))
2119 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002120#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002121 else if (PyUnicode_Check(sep)) {
2122 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2123 PyObject *res;
2124 if (uniself==NULL)
2125 return NULL;
2126 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2127 striptype, sep);
2128 Py_DECREF(uniself);
2129 return res;
2130 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002131#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002132 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002133#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002134 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002135#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002136 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002137#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002138 STRIPNAME(striptype));
2139 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002140 }
2141
2142 return do_strip(self, striptype);
2143}
2144
2145
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002146PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002147"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148\n\
2149Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002150whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002151If chars is given and not None, remove characters in chars instead.\n\
2152If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153
2154static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002155string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002157 if (PyTuple_GET_SIZE(args) == 0)
2158 return do_strip(self, BOTHSTRIP); /* Common case */
2159 else
2160 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161}
2162
2163
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002164PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002165"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002167Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002168If chars is given and not None, remove characters in chars instead.\n\
2169If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170
2171static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002172string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002174 if (PyTuple_GET_SIZE(args) == 0)
2175 return do_strip(self, LEFTSTRIP); /* Common case */
2176 else
2177 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178}
2179
2180
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002181PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002182"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002184Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002185If chars is given and not None, remove characters in chars instead.\n\
2186If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187
2188static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002189string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002191 if (PyTuple_GET_SIZE(args) == 0)
2192 return do_strip(self, RIGHTSTRIP); /* Common case */
2193 else
2194 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195}
2196
2197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002198PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199"S.lower() -> string\n\
2200\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002201Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002203/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2204#ifndef _tolower
2205#define _tolower tolower
2206#endif
2207
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002209string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002211 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002212 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002213 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002215 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002216 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002218
2219 s = PyString_AS_STRING(newobj);
2220
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002221 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002222
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002224 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002225 if (isupper(c))
2226 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002228
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230}
2231
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002232PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233"S.upper() -> string\n\
2234\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002235Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002237#ifndef _toupper
2238#define _toupper toupper
2239#endif
2240
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002242string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002244 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002245 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002246 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002248 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002249 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002251
2252 s = PyString_AS_STRING(newobj);
2253
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002254 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002255
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002257 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002258 if (islower(c))
2259 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002261
Anthony Baxtera6286212006-04-11 07:42:36 +00002262 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263}
2264
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002265PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266"S.title() -> string\n\
2267\n\
2268Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002269characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270
2271static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002272string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273{
2274 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002275 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002277 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278
Anthony Baxtera6286212006-04-11 07:42:36 +00002279 newobj = PyString_FromStringAndSize(NULL, n);
2280 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002282 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 for (i = 0; i < n; i++) {
2284 int c = Py_CHARMASK(*s++);
2285 if (islower(c)) {
2286 if (!previous_is_cased)
2287 c = toupper(c);
2288 previous_is_cased = 1;
2289 } else if (isupper(c)) {
2290 if (previous_is_cased)
2291 c = tolower(c);
2292 previous_is_cased = 1;
2293 } else
2294 previous_is_cased = 0;
2295 *s_new++ = c;
2296 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002297 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298}
2299
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002300PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301"S.capitalize() -> string\n\
2302\n\
2303Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305
2306static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002307string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308{
2309 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002310 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002311 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312
Anthony Baxtera6286212006-04-11 07:42:36 +00002313 newobj = PyString_FromStringAndSize(NULL, n);
2314 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002316 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 if (0 < n) {
2318 int c = Py_CHARMASK(*s++);
2319 if (islower(c))
2320 *s_new = toupper(c);
2321 else
2322 *s_new = c;
2323 s_new++;
2324 }
2325 for (i = 1; i < n; i++) {
2326 int c = Py_CHARMASK(*s++);
2327 if (isupper(c))
2328 *s_new = tolower(c);
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002333 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334}
2335
2336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002337PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338"S.count(sub[, start[, end]]) -> int\n\
2339\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002340Return the number of non-overlapping occurrences of substring sub in\n\
2341string S[start:end]. Optional arguments start and end are interpreted\n\
2342as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343
2344static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002345string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002347 PyObject *sub_obj;
2348 const char *str = PyString_AS_STRING(self), *sub;
2349 Py_ssize_t sub_len;
2350 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002352 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2353 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002355
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002356 if (PyString_Check(sub_obj)) {
2357 sub = PyString_AS_STRING(sub_obj);
2358 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002360#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002361 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002362 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002363 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002364 if (count == -1)
2365 return NULL;
2366 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002367 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002368 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002369#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002370 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 return NULL;
2372
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002373 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002374
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002375 return PyInt_FromSsize_t(
2376 stringlib_count(str + start, end - start, sub, sub_len)
2377 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378}
2379
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002380PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381"S.swapcase() -> string\n\
2382\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002384converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385
2386static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002387string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388{
2389 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002390 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002391 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
Anthony Baxtera6286212006-04-11 07:42:36 +00002393 newobj = PyString_FromStringAndSize(NULL, n);
2394 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002396 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002397 for (i = 0; i < n; i++) {
2398 int c = Py_CHARMASK(*s++);
2399 if (islower(c)) {
2400 *s_new = toupper(c);
2401 }
2402 else if (isupper(c)) {
2403 *s_new = tolower(c);
2404 }
2405 else
2406 *s_new = c;
2407 s_new++;
2408 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002409 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410}
2411
2412
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002413PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414"S.translate(table [,deletechars]) -> string\n\
2415\n\
2416Return a copy of the string S, where all characters occurring\n\
2417in the optional argument deletechars are removed, and the\n\
2418remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002419translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420
2421static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002422string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002425 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002426 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002428 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002429 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 PyObject *result;
2431 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002434 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437
2438 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002439 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002440 tablen = PyString_GET_SIZE(tableobj);
2441 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002442 else if (tableobj == Py_None) {
2443 table = NULL;
2444 tablen = 256;
2445 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002446#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002448 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 parameter; instead a mapping to None will cause characters
2450 to be deleted. */
2451 if (delobj != NULL) {
2452 PyErr_SetString(PyExc_TypeError,
2453 "deletions are implemented differently for unicode");
2454 return NULL;
2455 }
2456 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2457 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002458#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002459 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002460 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461
Martin v. Löwis00b61272002-12-12 20:03:19 +00002462 if (tablen != 256) {
2463 PyErr_SetString(PyExc_ValueError,
2464 "translation table must be 256 characters long");
2465 return NULL;
2466 }
2467
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 if (delobj != NULL) {
2469 if (PyString_Check(delobj)) {
2470 del_table = PyString_AS_STRING(delobj);
2471 dellen = PyString_GET_SIZE(delobj);
2472 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002473#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002474 else if (PyUnicode_Check(delobj)) {
2475 PyErr_SetString(PyExc_TypeError,
2476 "deletions are implemented differently for unicode");
2477 return NULL;
2478 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002479#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2481 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002482 }
2483 else {
2484 del_table = NULL;
2485 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486 }
2487
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002488 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489 result = PyString_FromStringAndSize((char *)NULL, inlen);
2490 if (result == NULL)
2491 return NULL;
2492 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002493 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002494
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002495 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496 /* If no deletions are required, use faster code */
2497 for (i = inlen; --i >= 0; ) {
2498 c = Py_CHARMASK(*input++);
2499 if (Py_CHARMASK((*output++ = table[c])) != c)
2500 changed = 1;
2501 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002502 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503 return result;
2504 Py_DECREF(result);
2505 Py_INCREF(input_obj);
2506 return input_obj;
2507 }
2508
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002509 if (table == NULL) {
2510 for (i = 0; i < 256; i++)
2511 trans_table[i] = Py_CHARMASK(i);
2512 } else {
2513 for (i = 0; i < 256; i++)
2514 trans_table[i] = Py_CHARMASK(table[i]);
2515 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516
2517 for (i = 0; i < dellen; i++)
2518 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2519
2520 for (i = inlen; --i >= 0; ) {
2521 c = Py_CHARMASK(*input++);
2522 if (trans_table[c] != -1)
2523 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2524 continue;
2525 changed = 1;
2526 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002527 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002528 Py_DECREF(result);
2529 Py_INCREF(input_obj);
2530 return input_obj;
2531 }
2532 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002533 if (inlen > 0)
2534 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 return result;
2536}
2537
2538
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002539#define FORWARD 1
2540#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002542/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002544#define findchar(target, target_len, c) \
2545 ((char *)memchr((const void *)(target), c, target_len))
2546
2547/* String ops must return a string. */
2548/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002549Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002550return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002552 if (PyString_CheckExact(self)) {
2553 Py_INCREF(self);
2554 return self;
2555 }
2556 return (PyStringObject *)PyString_FromStringAndSize(
2557 PyString_AS_STRING(self),
2558 PyString_GET_SIZE(self));
2559}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002560
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002561Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002562countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563{
2564 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002565 const char *start=target;
2566 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 while ( (start=findchar(start, end-start, c)) != NULL ) {
2569 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002570 if (count >= maxcount)
2571 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002572 start += 1;
2573 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002574 return count;
2575}
2576
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002577Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002578findstring(const char *target, Py_ssize_t target_len,
2579 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580 Py_ssize_t start,
2581 Py_ssize_t end,
2582 int direction)
2583{
2584 if (start < 0) {
2585 start += target_len;
2586 if (start < 0)
2587 start = 0;
2588 }
2589 if (end > target_len) {
2590 end = target_len;
2591 } else if (end < 0) {
2592 end += target_len;
2593 if (end < 0)
2594 end = 0;
2595 }
2596
2597 /* zero-length substrings always match at the first attempt */
2598 if (pattern_len == 0)
2599 return (direction > 0) ? start : end;
2600
2601 end -= pattern_len;
2602
2603 if (direction < 0) {
2604 for (; end >= start; end--)
2605 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2606 return end;
2607 } else {
2608 for (; start <= end; start++)
2609 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2610 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611 }
2612 return -1;
2613}
2614
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002615Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002616countstring(const char *target, Py_ssize_t target_len,
2617 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 Py_ssize_t start,
2619 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002620 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002622 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002624 if (start < 0) {
2625 start += target_len;
2626 if (start < 0)
2627 start = 0;
2628 }
2629 if (end > target_len) {
2630 end = target_len;
2631 } else if (end < 0) {
2632 end += target_len;
2633 if (end < 0)
2634 end = 0;
2635 }
2636
2637 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002638 if (pattern_len == 0 || maxcount == 0) {
2639 if (target_len+1 < maxcount)
2640 return target_len+1;
2641 return maxcount;
2642 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002643
2644 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002645 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002646 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002647 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2648 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002649 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650 end -= pattern_len-1;
2651 }
2652 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002653 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002654 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2655 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002656 if (--maxcount <= 0)
2657 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002658 start += pattern_len-1;
2659 }
2660 }
2661 return count;
2662}
2663
2664
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002665/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666
2667/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002668Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002669replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002670 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671 Py_ssize_t maxcount)
2672{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002673 char *self_s, *result_s;
2674 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 Py_ssize_t count, i, product;
2676 PyStringObject *result;
2677
2678 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002679
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680 /* 1 at the end plus 1 after every character */
2681 count = self_len+1;
2682 if (maxcount < count)
2683 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002684
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002685 /* Check for overflow */
2686 /* result_len = count * to_len + self_len; */
2687 product = count * to_len;
2688 if (product / to_len != count) {
2689 PyErr_SetString(PyExc_OverflowError,
2690 "replace string is too long");
2691 return NULL;
2692 }
2693 result_len = product + self_len;
2694 if (result_len < 0) {
2695 PyErr_SetString(PyExc_OverflowError,
2696 "replace string is too long");
2697 return NULL;
2698 }
2699
2700 if (! (result = (PyStringObject *)
2701 PyString_FromStringAndSize(NULL, result_len)) )
2702 return NULL;
2703
2704 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002705 result_s = PyString_AS_STRING(result);
2706
2707 /* TODO: special case single character, which doesn't need memcpy */
2708
2709 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002710 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002711 result_s += to_len;
2712 count -= 1;
2713
2714 for (i=0; i<count; i++) {
2715 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002716 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 result_s += to_len;
2718 }
2719
2720 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002721 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002722
2723 return result;
2724}
2725
2726/* Special case for deleting a single character */
2727/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002728Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002729replace_delete_single_character(PyStringObject *self,
2730 char from_c, Py_ssize_t maxcount)
2731{
2732 char *self_s, *result_s;
2733 char *start, *next, *end;
2734 Py_ssize_t self_len, result_len;
2735 Py_ssize_t count;
2736 PyStringObject *result;
2737
2738 self_len = PyString_GET_SIZE(self);
2739 self_s = PyString_AS_STRING(self);
2740
Andrew Dalke51324072006-05-26 20:25:22 +00002741 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002742 if (count == 0) {
2743 return return_self(self);
2744 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002745
2746 result_len = self_len - count; /* from_len == 1 */
2747 assert(result_len>=0);
2748
2749 if ( (result = (PyStringObject *)
2750 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2751 return NULL;
2752 result_s = PyString_AS_STRING(result);
2753
2754 start = self_s;
2755 end = self_s + self_len;
2756 while (count-- > 0) {
2757 next = findchar(start, end-start, from_c);
2758 if (next == NULL)
2759 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002760 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 result_s += (next-start);
2762 start = next+1;
2763 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002764 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002765
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766 return result;
2767}
2768
2769/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2770
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002771Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002772replace_delete_substring(PyStringObject *self,
2773 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002774 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002775 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002777 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 Py_ssize_t count, offset;
2779 PyStringObject *result;
2780
2781 self_len = PyString_GET_SIZE(self);
2782 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783
2784 count = countstring(self_s, self_len,
2785 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002786 0, self_len, 1,
2787 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788
2789 if (count == 0) {
2790 /* no matches */
2791 return return_self(self);
2792 }
2793
2794 result_len = self_len - (count * from_len);
2795 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 if ( (result = (PyStringObject *)
2798 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2799 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002800
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002801 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002802
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 start = self_s;
2804 end = self_s + self_len;
2805 while (count-- > 0) {
2806 offset = findstring(start, end-start,
2807 from_s, from_len,
2808 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002809 if (offset == -1)
2810 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002811 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002812
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002813 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002814
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 result_s += (next-start);
2816 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002817 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002818 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002820}
2821
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002823Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824replace_single_character_in_place(PyStringObject *self,
2825 char from_c, char to_c,
2826 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002827{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 char *self_s, *result_s, *start, *end, *next;
2829 Py_ssize_t self_len;
2830 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002831
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 /* The result string will be the same size */
2833 self_s = PyString_AS_STRING(self);
2834 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002837
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002838 if (next == NULL) {
2839 /* No matches; return the original string */
2840 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002841 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002842
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002844 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 if (result == NULL)
2846 return NULL;
2847 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002848 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002849
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850 /* change everything in-place, starting with this one */
2851 start = result_s + (next-self_s);
2852 *start = to_c;
2853 start++;
2854 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002855
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 while (--maxcount > 0) {
2857 next = findchar(start, end-start, from_c);
2858 if (next == NULL)
2859 break;
2860 *next = to_c;
2861 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002862 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002863
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002865}
2866
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002868Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002870 const char *from_s, Py_ssize_t from_len,
2871 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 Py_ssize_t maxcount)
2873{
2874 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002875 char *self_s;
2876 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002877 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002878
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002880
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 self_s = PyString_AS_STRING(self);
2882 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002883
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002884 offset = findstring(self_s, self_len,
2885 from_s, from_len,
2886 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002887 if (offset == -1) {
2888 /* No matches; return the original string */
2889 return return_self(self);
2890 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002891
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002892 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002893 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 if (result == NULL)
2895 return NULL;
2896 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002897 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002898
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002899 /* change everything in-place, starting with this one */
2900 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002901 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002902 start += from_len;
2903 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002904
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002905 while ( --maxcount > 0) {
2906 offset = findstring(start, end-start,
2907 from_s, from_len,
2908 0, end-start, FORWARD);
2909 if (offset==-1)
2910 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002911 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 start += offset+from_len;
2913 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002914
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915 return result;
2916}
2917
2918/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002919Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920replace_single_character(PyStringObject *self,
2921 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002922 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 Py_ssize_t maxcount)
2924{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002925 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002926 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002927 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002928 Py_ssize_t count, product;
2929 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002930
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002931 self_s = PyString_AS_STRING(self);
2932 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002933
Andrew Dalke51324072006-05-26 20:25:22 +00002934 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 if (count == 0) {
2936 /* no matches, return unchanged */
2937 return return_self(self);
2938 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002939
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002940 /* use the difference between current and new, hence the "-1" */
2941 /* result_len = self_len + count * (to_len-1) */
2942 product = count * (to_len-1);
2943 if (product / (to_len-1) != count) {
2944 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2945 return NULL;
2946 }
2947 result_len = self_len + product;
2948 if (result_len < 0) {
2949 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2950 return NULL;
2951 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002952
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953 if ( (result = (PyStringObject *)
2954 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2955 return NULL;
2956 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002957
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958 start = self_s;
2959 end = self_s + self_len;
2960 while (count-- > 0) {
2961 next = findchar(start, end-start, from_c);
2962 if (next == NULL)
2963 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002964
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002965 if (next == start) {
2966 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002967 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002968 result_s += to_len;
2969 start += 1;
2970 } else {
2971 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002972 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002974 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002975 result_s += to_len;
2976 start = next+1;
2977 }
2978 }
2979 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002980 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002981
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982 return result;
2983}
2984
2985/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002986Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002987replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002988 const char *from_s, Py_ssize_t from_len,
2989 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002990 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002991 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002992 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002993 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002994 Py_ssize_t count, offset, product;
2995 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002996
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002997 self_s = PyString_AS_STRING(self);
2998 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002999
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003000 count = countstring(self_s, self_len,
3001 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00003002 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003003 if (count == 0) {
3004 /* no matches, return unchanged */
3005 return return_self(self);
3006 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003007
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003008 /* Check for overflow */
3009 /* result_len = self_len + count * (to_len-from_len) */
3010 product = count * (to_len-from_len);
3011 if (product / (to_len-from_len) != count) {
3012 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3013 return NULL;
3014 }
3015 result_len = self_len + product;
3016 if (result_len < 0) {
3017 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3018 return NULL;
3019 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003020
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003021 if ( (result = (PyStringObject *)
3022 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3023 return NULL;
3024 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003025
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003026 start = self_s;
3027 end = self_s + self_len;
3028 while (count-- > 0) {
3029 offset = findstring(start, end-start,
3030 from_s, from_len,
3031 0, end-start, FORWARD);
3032 if (offset == -1)
3033 break;
3034 next = start+offset;
3035 if (next == start) {
3036 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003037 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003038 result_s += to_len;
3039 start += from_len;
3040 } else {
3041 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003042 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003043 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003044 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003045 result_s += to_len;
3046 start = next+from_len;
3047 }
3048 }
3049 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003050 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003051
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003052 return result;
3053}
3054
3055
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003056Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003058 const char *from_s, Py_ssize_t from_len,
3059 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003060 Py_ssize_t maxcount)
3061{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 if (maxcount < 0) {
3063 maxcount = PY_SSIZE_T_MAX;
3064 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3065 /* nothing to do; return the original string */
3066 return return_self(self);
3067 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003068
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003069 if (maxcount == 0 ||
3070 (from_len == 0 && to_len == 0)) {
3071 /* nothing to do; return the original string */
3072 return return_self(self);
3073 }
3074
3075 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003076
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 if (from_len == 0) {
3078 /* insert the 'to' string everywhere. */
3079 /* >>> "Python".replace("", ".") */
3080 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003081 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 }
3083
3084 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3085 /* point for an empty self string to generate a non-empty string */
3086 /* Special case so the remaining code always gets a non-empty string */
3087 if (PyString_GET_SIZE(self) == 0) {
3088 return return_self(self);
3089 }
3090
3091 if (to_len == 0) {
3092 /* delete all occurances of 'from' string */
3093 if (from_len == 1) {
3094 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003095 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003096 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003097 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003098 }
3099 }
3100
3101 /* Handle special case where both strings have the same length */
3102
3103 if (from_len == to_len) {
3104 if (from_len == 1) {
3105 return replace_single_character_in_place(
3106 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003107 from_s[0],
3108 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 maxcount);
3110 } else {
3111 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003112 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003113 }
3114 }
3115
3116 /* Otherwise use the more generic algorithms */
3117 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003118 return replace_single_character(self, from_s[0],
3119 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003120 } else {
3121 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003122 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003123 }
3124}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003125
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003126PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003127"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003128\n\
3129Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003130old replaced by new. If the optional argument count is\n\
3131given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132
3133static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003134string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003135{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003136 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003137 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003138 const char *from_s, *to_s;
3139 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003141 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003142 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003144 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003145 from_s = PyString_AS_STRING(from);
3146 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003148#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003149 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003150 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003151 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003152#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003153 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 return NULL;
3155
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003156 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003157 to_s = PyString_AS_STRING(to);
3158 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003160#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003161 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003162 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003164#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003165 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166 return NULL;
3167
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003168 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003169 from_s, from_len,
3170 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171}
3172
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003173/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003175/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003176 * against substr, using the start and end arguments. Returns
3177 * -1 on error, 0 if not found and 1 if found.
3178 */
3179Py_LOCAL(int)
3180_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3181 Py_ssize_t end, int direction)
3182{
3183 Py_ssize_t len = PyString_GET_SIZE(self);
3184 Py_ssize_t slen;
3185 const char* sub;
3186 const char* str;
3187
3188 if (PyString_Check(substr)) {
3189 sub = PyString_AS_STRING(substr);
3190 slen = PyString_GET_SIZE(substr);
3191 }
3192#ifdef Py_USING_UNICODE
3193 else if (PyUnicode_Check(substr))
3194 return PyUnicode_Tailmatch((PyObject *)self,
3195 substr, start, end, direction);
3196#endif
3197 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3198 return -1;
3199 str = PyString_AS_STRING(self);
3200
3201 string_adjust_indices(&start, &end, len);
3202
3203 if (direction < 0) {
3204 /* startswith */
3205 if (start+slen > len)
3206 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003207 } else {
3208 /* endswith */
3209 if (end-start < slen || start > len)
3210 return 0;
3211
3212 if (end-slen > start)
3213 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003214 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003215 if (end-start >= slen)
3216 return ! memcmp(str+start, sub, slen);
3217 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003218}
3219
3220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003222"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003224Return True if S starts with the specified prefix, False otherwise.\n\
3225With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003226With optional end, stop comparing S at that position.\n\
3227prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003228
3229static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003230string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003231{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003232 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003233 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003235 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003236
Guido van Rossumc6821402000-05-08 14:08:05 +00003237 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3238 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003240 if (PyTuple_Check(subobj)) {
3241 Py_ssize_t i;
3242 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3243 result = _string_tailmatch(self,
3244 PyTuple_GET_ITEM(subobj, i),
3245 start, end, -1);
3246 if (result == -1)
3247 return NULL;
3248 else if (result) {
3249 Py_RETURN_TRUE;
3250 }
3251 }
3252 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003253 }
Georg Brandl24250812006-06-09 18:45:48 +00003254 result = _string_tailmatch(self, subobj, start, end, -1);
3255 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003257 else
Georg Brandl24250812006-06-09 18:45:48 +00003258 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003259}
3260
3261
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003262PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003263"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003264\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003265Return True if S ends with the specified suffix, False otherwise.\n\
3266With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003267With optional end, stop comparing S at that position.\n\
3268suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003269
3270static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003271string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003272{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003273 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003274 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003275 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003276 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003277
Guido van Rossumc6821402000-05-08 14:08:05 +00003278 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3279 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003280 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003281 if (PyTuple_Check(subobj)) {
3282 Py_ssize_t i;
3283 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3284 result = _string_tailmatch(self,
3285 PyTuple_GET_ITEM(subobj, i),
3286 start, end, +1);
3287 if (result == -1)
3288 return NULL;
3289 else if (result) {
3290 Py_RETURN_TRUE;
3291 }
3292 }
3293 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294 }
Georg Brandl24250812006-06-09 18:45:48 +00003295 result = _string_tailmatch(self, subobj, start, end, +1);
3296 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003297 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003298 else
Georg Brandl24250812006-06-09 18:45:48 +00003299 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003300}
3301
3302
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003303PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003304"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003305\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003306Encodes S using the codec registered for encoding. encoding defaults\n\
3307to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003308handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003309a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3310'xmlcharrefreplace' as well as any other name registered with\n\
3311codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003312
3313static PyObject *
3314string_encode(PyStringObject *self, PyObject *args)
3315{
3316 char *encoding = NULL;
3317 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003318 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003319
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003320 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3321 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003322 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003323 if (v == NULL)
3324 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003325 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3326 PyErr_Format(PyExc_TypeError,
3327 "encoder did not return a string/unicode object "
3328 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003329 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003330 Py_DECREF(v);
3331 return NULL;
3332 }
3333 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003334
3335 onError:
3336 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003337}
3338
3339
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003340PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003341"S.decode([encoding[,errors]]) -> object\n\
3342\n\
3343Decodes S using the codec registered for encoding. encoding defaults\n\
3344to the default encoding. errors may be given to set a different error\n\
3345handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003346a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3347as well as any other name registerd with codecs.register_error that is\n\
3348able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003349
3350static PyObject *
3351string_decode(PyStringObject *self, PyObject *args)
3352{
3353 char *encoding = NULL;
3354 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003355 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003356
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003357 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3358 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003359 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003360 if (v == NULL)
3361 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003362 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3363 PyErr_Format(PyExc_TypeError,
3364 "decoder did not return a string/unicode object "
3365 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003366 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003367 Py_DECREF(v);
3368 return NULL;
3369 }
3370 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003371
3372 onError:
3373 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003374}
3375
3376
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003377PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378"S.expandtabs([tabsize]) -> string\n\
3379\n\
3380Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003381If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382
3383static PyObject*
3384string_expandtabs(PyStringObject *self, PyObject *args)
3385{
Guido van Rossum5bdff602008-03-11 21:18:06 +00003386 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 char *q;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003388 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003389 PyObject *u;
3390 int tabsize = 8;
3391
3392 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3393 return NULL;
3394
Thomas Wouters7e474022000-07-16 12:04:32 +00003395 /* First pass: determine size of output string */
Guido van Rossum5bdff602008-03-11 21:18:06 +00003396 i = 0; /* chars up to and including most recent \n or \r */
3397 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3398 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 for (p = PyString_AS_STRING(self); p < e; p++)
3400 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003401 if (tabsize > 0) {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003402 incr = tabsize - (j % tabsize);
3403 if (j > PY_SSIZE_T_MAX - incr)
3404 goto overflow1;
3405 j += incr;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003406 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 }
3408 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003409 if (j > PY_SSIZE_T_MAX - 1)
3410 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411 j++;
3412 if (*p == '\n' || *p == '\r') {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003413 if (i > PY_SSIZE_T_MAX - j)
3414 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415 i += j;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003416 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003417 }
3418 }
3419
Guido van Rossum5bdff602008-03-11 21:18:06 +00003420 if (i > PY_SSIZE_T_MAX - j)
3421 goto overflow1;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003422
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423 /* Second pass: create output string and fill it */
3424 u = PyString_FromStringAndSize(NULL, i + j);
3425 if (!u)
3426 return NULL;
3427
Guido van Rossum5bdff602008-03-11 21:18:06 +00003428 j = 0; /* same as in first pass */
3429 q = PyString_AS_STRING(u); /* next output char */
3430 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431
3432 for (p = PyString_AS_STRING(self); p < e; p++)
3433 if (*p == '\t') {
3434 if (tabsize > 0) {
3435 i = tabsize - (j % tabsize);
3436 j += i;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003437 while (i--) {
3438 if (q >= qe)
3439 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440 *q++ = ' ';
Guido van Rossum5bdff602008-03-11 21:18:06 +00003441 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442 }
3443 }
3444 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003445 if (q >= qe)
3446 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447 *q++ = *p;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003448 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449 if (*p == '\n' || *p == '\r')
3450 j = 0;
3451 }
3452
3453 return u;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003454
3455 overflow2:
3456 Py_DECREF(u);
3457 overflow1:
3458 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3459 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003460}
3461
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003462Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003463pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464{
3465 PyObject *u;
3466
3467 if (left < 0)
3468 left = 0;
3469 if (right < 0)
3470 right = 0;
3471
Tim Peters8fa5dd02001-09-12 02:18:30 +00003472 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473 Py_INCREF(self);
3474 return (PyObject *)self;
3475 }
3476
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003477 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003478 left + PyString_GET_SIZE(self) + right);
3479 if (u) {
3480 if (left)
3481 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003482 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003483 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484 PyString_GET_SIZE(self));
3485 if (right)
3486 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3487 fill, right);
3488 }
3489
3490 return u;
3491}
3492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003493PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003494"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003495"\n"
3496"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003497"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498
3499static PyObject *
3500string_ljust(PyStringObject *self, PyObject *args)
3501{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003502 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003503 char fillchar = ' ';
3504
Thomas Wouters4abb3662006-04-19 14:50:15 +00003505 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506 return NULL;
3507
Tim Peters8fa5dd02001-09-12 02:18:30 +00003508 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509 Py_INCREF(self);
3510 return (PyObject*) self;
3511 }
3512
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003513 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514}
3515
3516
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003517PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003518"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003519"\n"
3520"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003521"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522
3523static PyObject *
3524string_rjust(PyStringObject *self, PyObject *args)
3525{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003526 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003527 char fillchar = ' ';
3528
Thomas Wouters4abb3662006-04-19 14:50:15 +00003529 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530 return NULL;
3531
Tim Peters8fa5dd02001-09-12 02:18:30 +00003532 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533 Py_INCREF(self);
3534 return (PyObject*) self;
3535 }
3536
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003537 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538}
3539
3540
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003541PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003542"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003543"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003544"Return S centered in a string of length width. Padding is\n"
3545"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546
3547static PyObject *
3548string_center(PyStringObject *self, PyObject *args)
3549{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003550 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003551 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003552 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553
Thomas Wouters4abb3662006-04-19 14:50:15 +00003554 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555 return NULL;
3556
Tim Peters8fa5dd02001-09-12 02:18:30 +00003557 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558 Py_INCREF(self);
3559 return (PyObject*) self;
3560 }
3561
3562 marg = width - PyString_GET_SIZE(self);
3563 left = marg / 2 + (marg & width & 1);
3564
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003565 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566}
3567
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003569"S.zfill(width) -> string\n"
3570"\n"
3571"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003572"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003573
3574static PyObject *
3575string_zfill(PyStringObject *self, PyObject *args)
3576{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003577 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003578 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003579 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003580 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003581
Thomas Wouters4abb3662006-04-19 14:50:15 +00003582 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003583 return NULL;
3584
3585 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003586 if (PyString_CheckExact(self)) {
3587 Py_INCREF(self);
3588 return (PyObject*) self;
3589 }
3590 else
3591 return PyString_FromStringAndSize(
3592 PyString_AS_STRING(self),
3593 PyString_GET_SIZE(self)
3594 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003595 }
3596
3597 fill = width - PyString_GET_SIZE(self);
3598
3599 s = pad(self, fill, 0, '0');
3600
3601 if (s == NULL)
3602 return NULL;
3603
3604 p = PyString_AS_STRING(s);
3605 if (p[fill] == '+' || p[fill] == '-') {
3606 /* move sign to beginning of string */
3607 p[0] = p[fill];
3608 p[fill] = '0';
3609 }
3610
3611 return (PyObject*) s;
3612}
3613
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003614PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003615"S.isspace() -> bool\n\
3616\n\
3617Return True if all characters in S are whitespace\n\
3618and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619
3620static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003621string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622{
Fred Drakeba096332000-07-09 07:04:36 +00003623 register const unsigned char *p
3624 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003625 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627 /* Shortcut for single character strings */
3628 if (PyString_GET_SIZE(self) == 1 &&
3629 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003632 /* Special case for empty strings */
3633 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003635
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636 e = p + PyString_GET_SIZE(self);
3637 for (; p < e; p++) {
3638 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642}
3643
3644
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003645PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003647\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003648Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003649and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003650
3651static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003652string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003653{
Fred Drakeba096332000-07-09 07:04:36 +00003654 register const unsigned char *p
3655 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003656 register const unsigned char *e;
3657
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003658 /* Shortcut for single character strings */
3659 if (PyString_GET_SIZE(self) == 1 &&
3660 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003662
3663 /* Special case for empty strings */
3664 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003666
3667 e = p + PyString_GET_SIZE(self);
3668 for (; p < e; p++) {
3669 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003671 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003673}
3674
3675
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003678\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003679Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003680and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003681
3682static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003683string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684{
Fred Drakeba096332000-07-09 07:04:36 +00003685 register const unsigned char *p
3686 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003687 register const unsigned char *e;
3688
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003689 /* Shortcut for single character strings */
3690 if (PyString_GET_SIZE(self) == 1 &&
3691 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003693
3694 /* Special case for empty strings */
3695 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003697
3698 e = p + PyString_GET_SIZE(self);
3699 for (; p < e; p++) {
3700 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003702 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003704}
3705
3706
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003707PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003710Return True if all characters in S are digits\n\
3711and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712
3713static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003714string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715{
Fred Drakeba096332000-07-09 07:04:36 +00003716 register const unsigned char *p
3717 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003718 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 /* Shortcut for single character strings */
3721 if (PyString_GET_SIZE(self) == 1 &&
3722 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003725 /* Special case for empty strings */
3726 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003728
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 e = p + PyString_GET_SIZE(self);
3730 for (; p < e; p++) {
3731 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735}
3736
3737
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003738PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003741Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003742at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
3744static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003745string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746{
Fred Drakeba096332000-07-09 07:04:36 +00003747 register const unsigned char *p
3748 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003749 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750 int cased;
3751
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752 /* Shortcut for single character strings */
3753 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003756 /* Special case for empty strings */
3757 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003758 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003759
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 e = p + PyString_GET_SIZE(self);
3761 cased = 0;
3762 for (; p < e; p++) {
3763 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 else if (!cased && islower(*p))
3766 cased = 1;
3767 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003768 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769}
3770
3771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003772PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003773"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003775Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003776at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777
3778static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003779string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780{
Fred Drakeba096332000-07-09 07:04:36 +00003781 register const unsigned char *p
3782 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003783 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 int cased;
3785
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 /* Shortcut for single character strings */
3787 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003788 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003790 /* Special case for empty strings */
3791 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003792 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003793
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 e = p + PyString_GET_SIZE(self);
3795 cased = 0;
3796 for (; p < e; p++) {
3797 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003798 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799 else if (!cased && isupper(*p))
3800 cased = 1;
3801 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003802 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803}
3804
3805
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003806PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003807"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003809Return True if S is a titlecased string and there is at least one\n\
3810character in S, i.e. uppercase characters may only follow uncased\n\
3811characters and lowercase characters only cased ones. Return False\n\
3812otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813
3814static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003815string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816{
Fred Drakeba096332000-07-09 07:04:36 +00003817 register const unsigned char *p
3818 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003819 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 int cased, previous_is_cased;
3821
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822 /* Shortcut for single character strings */
3823 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003824 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003825
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003826 /* Special case for empty strings */
3827 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003828 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003829
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 e = p + PyString_GET_SIZE(self);
3831 cased = 0;
3832 previous_is_cased = 0;
3833 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003834 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835
3836 if (isupper(ch)) {
3837 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003838 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003839 previous_is_cased = 1;
3840 cased = 1;
3841 }
3842 else if (islower(ch)) {
3843 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003844 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845 previous_is_cased = 1;
3846 cased = 1;
3847 }
3848 else
3849 previous_is_cased = 0;
3850 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003851 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852}
3853
3854
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003855PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003856"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857\n\
3858Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003859Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003860is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003861
Guido van Rossum4c08d552000-03-10 22:55:18 +00003862static PyObject*
3863string_splitlines(PyStringObject *self, PyObject *args)
3864{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003865 register Py_ssize_t i;
3866 register Py_ssize_t j;
3867 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003868 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003869 PyObject *list;
3870 PyObject *str;
3871 char *data;
3872
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003873 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003874 return NULL;
3875
3876 data = PyString_AS_STRING(self);
3877 len = PyString_GET_SIZE(self);
3878
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003879 /* This does not use the preallocated list because splitlines is
3880 usually run with hundreds of newlines. The overhead of
3881 switching between PyList_SET_ITEM and append causes about a
3882 2-3% slowdown for that common case. A smarter implementation
3883 could move the if check out, so the SET_ITEMs are done first
3884 and the appends only done when the prealloc buffer is full.
3885 That's too much work for little gain.*/
3886
Guido van Rossum4c08d552000-03-10 22:55:18 +00003887 list = PyList_New(0);
3888 if (!list)
3889 goto onError;
3890
3891 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003892 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003893
Guido van Rossum4c08d552000-03-10 22:55:18 +00003894 /* Find a line and append it */
3895 while (i < len && data[i] != '\n' && data[i] != '\r')
3896 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003897
3898 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003899 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003900 if (i < len) {
3901 if (data[i] == '\r' && i + 1 < len &&
3902 data[i+1] == '\n')
3903 i += 2;
3904 else
3905 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003906 if (keepends)
3907 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003908 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003909 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003910 j = i;
3911 }
3912 if (j < len) {
3913 SPLIT_APPEND(data, j, len);
3914 }
3915
3916 return list;
3917
3918 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003919 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003920 return NULL;
3921}
3922
3923#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003924#undef SPLIT_ADD
3925#undef MAX_PREALLOC
3926#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003927
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003928static PyObject *
3929string_getnewargs(PyStringObject *v)
3930{
Christian Heimese93237d2007-12-19 02:37:44 +00003931 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003932}
3933
Eric Smitha9f7d622008-02-17 19:46:49 +00003934
3935#include "stringlib/string_format.h"
3936
3937PyDoc_STRVAR(format__doc__,
3938"S.format(*args, **kwargs) -> unicode\n\
3939\n\
3940");
3941
3942PyDoc_STRVAR(p_format__doc__,
3943"S.__format__(format_spec) -> unicode\n\
3944\n\
3945");
3946
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003947
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003948static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003949string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003950 /* Counterparts of the obsolete stropmodule functions; except
3951 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003952 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3953 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003954 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003955 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3956 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003957 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3958 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3959 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3960 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3961 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3962 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3963 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003964 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3965 capitalize__doc__},
3966 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3967 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3968 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003969 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003970 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3971 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3972 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3973 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3974 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3975 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3976 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003977 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3978 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003979 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3980 startswith__doc__},
3981 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3982 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3983 swapcase__doc__},
3984 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3985 translate__doc__},
3986 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3987 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3988 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3989 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3990 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Eric Smitha9f7d622008-02-17 19:46:49 +00003991 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3992 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3993 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3994 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003995 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3996 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3997 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3998 expandtabs__doc__},
3999 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4000 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00004001 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004002 {NULL, NULL} /* sentinel */
4003};
4004
Jeremy Hylton938ace62002-07-17 16:30:39 +00004005static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004006str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004008static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004010{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004012 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004013
Guido van Rossumae960af2001-08-30 03:11:59 +00004014 if (type != &PyString_Type)
4015 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004016 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4017 return NULL;
4018 if (x == NULL)
4019 return PyString_FromString("");
4020 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004021}
4022
Guido van Rossumae960af2001-08-30 03:11:59 +00004023static PyObject *
4024str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4025{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004026 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004027 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004028
4029 assert(PyType_IsSubtype(type, &PyString_Type));
4030 tmp = string_new(&PyString_Type, args, kwds);
4031 if (tmp == NULL)
4032 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004033 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004034 n = PyString_GET_SIZE(tmp);
4035 pnew = type->tp_alloc(type, n);
4036 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004037 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004038 ((PyStringObject *)pnew)->ob_shash =
4039 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004040 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004041 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004042 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004043 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004044}
4045
Guido van Rossumcacfc072002-05-24 19:01:59 +00004046static PyObject *
4047basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4048{
4049 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004050 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004051 return NULL;
4052}
4053
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004054static PyObject *
4055string_mod(PyObject *v, PyObject *w)
4056{
4057 if (!PyString_Check(v)) {
4058 Py_INCREF(Py_NotImplemented);
4059 return Py_NotImplemented;
4060 }
4061 return PyString_Format(v, w);
4062}
4063
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004064PyDoc_STRVAR(basestring_doc,
4065"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004066
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004067static PyNumberMethods string_as_number = {
4068 0, /*nb_add*/
4069 0, /*nb_subtract*/
4070 0, /*nb_multiply*/
4071 0, /*nb_divide*/
4072 string_mod, /*nb_remainder*/
4073};
4074
4075
Guido van Rossumcacfc072002-05-24 19:01:59 +00004076PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004077 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004078 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004079 0,
4080 0,
4081 0, /* tp_dealloc */
4082 0, /* tp_print */
4083 0, /* tp_getattr */
4084 0, /* tp_setattr */
4085 0, /* tp_compare */
4086 0, /* tp_repr */
4087 0, /* tp_as_number */
4088 0, /* tp_as_sequence */
4089 0, /* tp_as_mapping */
4090 0, /* tp_hash */
4091 0, /* tp_call */
4092 0, /* tp_str */
4093 0, /* tp_getattro */
4094 0, /* tp_setattro */
4095 0, /* tp_as_buffer */
4096 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4097 basestring_doc, /* tp_doc */
4098 0, /* tp_traverse */
4099 0, /* tp_clear */
4100 0, /* tp_richcompare */
4101 0, /* tp_weaklistoffset */
4102 0, /* tp_iter */
4103 0, /* tp_iternext */
4104 0, /* tp_methods */
4105 0, /* tp_members */
4106 0, /* tp_getset */
4107 &PyBaseObject_Type, /* tp_base */
4108 0, /* tp_dict */
4109 0, /* tp_descr_get */
4110 0, /* tp_descr_set */
4111 0, /* tp_dictoffset */
4112 0, /* tp_init */
4113 0, /* tp_alloc */
4114 basestring_new, /* tp_new */
4115 0, /* tp_free */
4116};
4117
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004118PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004119"str(object) -> string\n\
4120\n\
4121Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004122If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004123
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004124PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004125 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004126 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004127 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004128 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004129 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004130 (printfunc)string_print, /* tp_print */
4131 0, /* tp_getattr */
4132 0, /* tp_setattr */
4133 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004134 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004135 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004136 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004137 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004138 (hashfunc)string_hash, /* tp_hash */
4139 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004140 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004141 PyObject_GenericGetAttr, /* tp_getattro */
4142 0, /* tp_setattro */
4143 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Christian Heimes1a6387e2008-03-26 12:49:49 +00004145 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4146 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004147 string_doc, /* tp_doc */
4148 0, /* tp_traverse */
4149 0, /* tp_clear */
4150 (richcmpfunc)string_richcompare, /* tp_richcompare */
4151 0, /* tp_weaklistoffset */
4152 0, /* tp_iter */
4153 0, /* tp_iternext */
4154 string_methods, /* tp_methods */
4155 0, /* tp_members */
4156 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004157 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004158 0, /* tp_dict */
4159 0, /* tp_descr_get */
4160 0, /* tp_descr_set */
4161 0, /* tp_dictoffset */
4162 0, /* tp_init */
4163 0, /* tp_alloc */
4164 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004165 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004166};
4167
4168void
Fred Drakeba096332000-07-09 07:04:36 +00004169PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004170{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004171 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004172 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004173 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004174 if (w == NULL || !PyString_Check(*pv)) {
4175 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004176 *pv = NULL;
4177 return;
4178 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004179 v = string_concat((PyStringObject *) *pv, w);
4180 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004181 *pv = v;
4182}
4183
Guido van Rossum013142a1994-08-30 08:19:36 +00004184void
Fred Drakeba096332000-07-09 07:04:36 +00004185PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004186{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004187 PyString_Concat(pv, w);
4188 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004189}
4190
4191
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004192/* The following function breaks the notion that strings are immutable:
4193 it changes the size of a string. We get away with this only if there
4194 is only one module referencing the object. You can also think of it
4195 as creating a new string object and destroying the old one, only
4196 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004197 already be known to some other part of the code...
4198 Note that if there's not enough memory to resize the string, the original
4199 string object at *pv is deallocated, *pv is set to NULL, an "out of
4200 memory" exception is set, and -1 is returned. Else (on success) 0 is
4201 returned, and the value in *pv may or may not be the same as on input.
4202 As always, an extra byte is allocated for a trailing \0 byte (newsize
4203 does *not* include that), and a trailing \0 byte is stored.
4204*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004205
4206int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004207_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004208{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004209 register PyObject *v;
4210 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004211 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004212 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004213 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004214 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004215 Py_DECREF(v);
4216 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004217 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004218 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004219 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004220 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004221 _Py_ForgetReference(v);
4222 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004223 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004224 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004225 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004226 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004227 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004228 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004229 _Py_NewReference(*pv);
4230 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004231 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004232 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004233 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004234 return 0;
4235}
Guido van Rossume5372401993-03-16 12:15:04 +00004236
4237/* Helpers for formatstring */
4238
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004239Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004240getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004241{
Thomas Wouters977485d2006-02-16 15:59:12 +00004242 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004243 if (argidx < arglen) {
4244 (*p_argidx)++;
4245 if (arglen < 0)
4246 return args;
4247 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004248 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004249 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004250 PyErr_SetString(PyExc_TypeError,
4251 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004252 return NULL;
4253}
4254
Tim Peters38fd5b62000-09-21 05:43:11 +00004255/* Format codes
4256 * F_LJUST '-'
4257 * F_SIGN '+'
4258 * F_BLANK ' '
4259 * F_ALT '#'
4260 * F_ZERO '0'
4261 */
Guido van Rossume5372401993-03-16 12:15:04 +00004262#define F_LJUST (1<<0)
4263#define F_SIGN (1<<1)
4264#define F_BLANK (1<<2)
4265#define F_ALT (1<<3)
4266#define F_ZERO (1<<4)
4267
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004268Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004269formatfloat(char *buf, size_t buflen, int flags,
4270 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004271{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004272 /* fmt = '%#.' + `prec` + `type`
4273 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004274 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004275 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004276 x = PyFloat_AsDouble(v);
4277 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004278 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004279 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004280 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004281 }
Guido van Rossume5372401993-03-16 12:15:04 +00004282 if (prec < 0)
4283 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004284 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4285 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004286 /* Worst case length calc to ensure no buffer overrun:
4287
4288 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004289 fmt = %#.<prec>g
4290 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004291 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004292 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004293
4294 'f' formats:
4295 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4296 len = 1 + 50 + 1 + prec = 52 + prec
4297
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004298 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004299 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004300
4301 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004302 if (((type == 'g' || type == 'G') &&
4303 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004304 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004305 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004306 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004307 return -1;
4308 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004309 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4310 (flags&F_ALT) ? "#" : "",
4311 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004312 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004313 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004314}
4315
Tim Peters38fd5b62000-09-21 05:43:11 +00004316/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4317 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4318 * Python's regular ints.
4319 * Return value: a new PyString*, or NULL if error.
4320 * . *pbuf is set to point into it,
4321 * *plen set to the # of chars following that.
4322 * Caller must decref it when done using pbuf.
4323 * The string starting at *pbuf is of the form
4324 * "-"? ("0x" | "0X")? digit+
4325 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004326 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004327 * There will be at least prec digits, zero-filled on the left if
4328 * necessary to get that many.
4329 * val object to be converted
4330 * flags bitmask of format flags; only F_ALT is looked at
4331 * prec minimum number of digits; 0-fill on left if needed
4332 * type a character in [duoxX]; u acts the same as d
4333 *
4334 * CAUTION: o, x and X conversions on regular ints can never
4335 * produce a '-' sign, but can for Python's unbounded ints.
4336 */
4337PyObject*
4338_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4339 char **pbuf, int *plen)
4340{
4341 PyObject *result = NULL;
4342 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004343 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004344 int sign; /* 1 if '-', else 0 */
4345 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004346 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 int numdigits; /* len == numnondigits + numdigits */
4348 int numnondigits = 0;
4349
4350 switch (type) {
4351 case 'd':
4352 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004353 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004354 break;
4355 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004356 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004357 break;
4358 case 'x':
4359 case 'X':
4360 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004361 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004362 break;
4363 default:
4364 assert(!"'type' not in [duoxX]");
4365 }
4366 if (!result)
4367 return NULL;
4368
Neal Norwitz56423e52006-08-13 18:11:08 +00004369 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004370 if (!buf) {
4371 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004372 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004373 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004374
Tim Peters38fd5b62000-09-21 05:43:11 +00004375 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004376 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004377 PyErr_BadInternalCall();
4378 return NULL;
4379 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004380 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004381 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004382 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4383 return NULL;
4384 }
4385 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004386 if (buf[len-1] == 'L') {
4387 --len;
4388 buf[len] = '\0';
4389 }
4390 sign = buf[0] == '-';
4391 numnondigits += sign;
4392 numdigits = len - numnondigits;
4393 assert(numdigits > 0);
4394
Tim Petersfff53252001-04-12 18:38:48 +00004395 /* Get rid of base marker unless F_ALT */
4396 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004397 /* Need to skip 0x, 0X or 0. */
4398 int skipped = 0;
4399 switch (type) {
4400 case 'o':
4401 assert(buf[sign] == '0');
4402 /* If 0 is only digit, leave it alone. */
4403 if (numdigits > 1) {
4404 skipped = 1;
4405 --numdigits;
4406 }
4407 break;
4408 case 'x':
4409 case 'X':
4410 assert(buf[sign] == '0');
4411 assert(buf[sign + 1] == 'x');
4412 skipped = 2;
4413 numnondigits -= 2;
4414 break;
4415 }
4416 if (skipped) {
4417 buf += skipped;
4418 len -= skipped;
4419 if (sign)
4420 buf[0] = '-';
4421 }
4422 assert(len == numnondigits + numdigits);
4423 assert(numdigits > 0);
4424 }
4425
4426 /* Fill with leading zeroes to meet minimum width. */
4427 if (prec > numdigits) {
4428 PyObject *r1 = PyString_FromStringAndSize(NULL,
4429 numnondigits + prec);
4430 char *b1;
4431 if (!r1) {
4432 Py_DECREF(result);
4433 return NULL;
4434 }
4435 b1 = PyString_AS_STRING(r1);
4436 for (i = 0; i < numnondigits; ++i)
4437 *b1++ = *buf++;
4438 for (i = 0; i < prec - numdigits; i++)
4439 *b1++ = '0';
4440 for (i = 0; i < numdigits; i++)
4441 *b1++ = *buf++;
4442 *b1 = '\0';
4443 Py_DECREF(result);
4444 result = r1;
4445 buf = PyString_AS_STRING(result);
4446 len = numnondigits + prec;
4447 }
4448
4449 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004450 if (type == 'X') {
4451 /* Need to convert all lower case letters to upper case.
4452 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004453 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004454 if (buf[i] >= 'a' && buf[i] <= 'x')
4455 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004456 }
4457 *pbuf = buf;
4458 *plen = len;
4459 return result;
4460}
4461
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004462Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004463formatint(char *buf, size_t buflen, int flags,
4464 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004465{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004466 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004467 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4468 + 1 + 1 = 24 */
4469 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004470 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004471 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004472
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004473 x = PyInt_AsLong(v);
4474 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004475 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004476 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004477 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004478 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004479 if (x < 0 && type == 'u') {
4480 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004481 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004482 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4483 sign = "-";
4484 else
4485 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004486 if (prec < 0)
4487 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004488
4489 if ((flags & F_ALT) &&
4490 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004491 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004492 * of issues that cause pain:
4493 * - when 0 is being converted, the C standard leaves off
4494 * the '0x' or '0X', which is inconsistent with other
4495 * %#x/%#X conversions and inconsistent with Python's
4496 * hex() function
4497 * - there are platforms that violate the standard and
4498 * convert 0 with the '0x' or '0X'
4499 * (Metrowerks, Compaq Tru64)
4500 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004501 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004502 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004503 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004504 * We can achieve the desired consistency by inserting our
4505 * own '0x' or '0X' prefix, and substituting %x/%X in place
4506 * of %#x/%#X.
4507 *
4508 * Note that this is the same approach as used in
4509 * formatint() in unicodeobject.c
4510 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004511 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4512 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004513 }
4514 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004515 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4516 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004517 prec, type);
4518 }
4519
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004520 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4521 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004522 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004523 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004524 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004525 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004526 return -1;
4527 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004528 if (sign[0])
4529 PyOS_snprintf(buf, buflen, fmt, -x);
4530 else
4531 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004532 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004533}
4534
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004535Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004536formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004537{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004538 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004539 if (PyString_Check(v)) {
4540 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004541 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004542 }
4543 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004544 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004545 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004546 }
4547 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004548 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004549}
4550
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004551/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4552
4553 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4554 chars are formatted. XXX This is a magic number. Each formatting
4555 routine does bounds checking to ensure no overflow, but a better
4556 solution may be to malloc a buffer of appropriate size for each
4557 format. For now, the current solution is sufficient.
4558*/
4559#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004560
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004561PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004562PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004563{
4564 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004565 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004566 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004567 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004568 PyObject *result, *orig_args;
4569#ifdef Py_USING_UNICODE
4570 PyObject *v, *w;
4571#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 PyObject *dict = NULL;
4573 if (format == NULL || !PyString_Check(format) || args == NULL) {
4574 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004575 return NULL;
4576 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004577 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004578 fmt = PyString_AS_STRING(format);
4579 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004580 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004581 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004582 if (result == NULL)
4583 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 res = PyString_AsString(result);
4585 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004586 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004587 argidx = 0;
4588 }
4589 else {
4590 arglen = -1;
4591 argidx = -2;
4592 }
Christian Heimese93237d2007-12-19 02:37:44 +00004593 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004594 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004595 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004596 while (--fmtcnt >= 0) {
4597 if (*fmt != '%') {
4598 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004599 rescnt = fmtcnt + 100;
4600 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004601 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004602 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004603 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004604 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004605 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004606 }
4607 *res++ = *fmt++;
4608 }
4609 else {
4610 /* Got a format specifier */
4611 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004612 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004613 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004614 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004615 int fill;
Facundo Batistac11cecf2008-02-24 03:17:21 +00004616 int isnumok;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004617 PyObject *v = NULL;
4618 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004619 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004620 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004621 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004622 char formatbuf[FORMATBUFLEN];
4623 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004624#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004625 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004626 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004627#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004628
Guido van Rossumda9c2711996-12-05 21:58:58 +00004629 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004630 if (*fmt == '(') {
4631 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004632 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004634 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004635
4636 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004637 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004638 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004639 goto error;
4640 }
4641 ++fmt;
4642 --fmtcnt;
4643 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004644 /* Skip over balanced parentheses */
4645 while (pcount > 0 && --fmtcnt >= 0) {
4646 if (*fmt == ')')
4647 --pcount;
4648 else if (*fmt == '(')
4649 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004650 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004651 }
4652 keylen = fmt - keystart - 1;
4653 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004655 "incomplete format key");
4656 goto error;
4657 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004658 key = PyString_FromStringAndSize(keystart,
4659 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004660 if (key == NULL)
4661 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004662 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004663 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004664 args_owned = 0;
4665 }
4666 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004667 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004668 if (args == NULL) {
4669 goto error;
4670 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004671 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004672 arglen = -1;
4673 argidx = -2;
4674 }
Guido van Rossume5372401993-03-16 12:15:04 +00004675 while (--fmtcnt >= 0) {
4676 switch (c = *fmt++) {
4677 case '-': flags |= F_LJUST; continue;
4678 case '+': flags |= F_SIGN; continue;
4679 case ' ': flags |= F_BLANK; continue;
4680 case '#': flags |= F_ALT; continue;
4681 case '0': flags |= F_ZERO; continue;
4682 }
4683 break;
4684 }
4685 if (c == '*') {
4686 v = getnextarg(args, arglen, &argidx);
4687 if (v == NULL)
4688 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004689 if (!PyInt_Check(v)) {
4690 PyErr_SetString(PyExc_TypeError,
4691 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004692 goto error;
4693 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004694 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004695 if (width < 0) {
4696 flags |= F_LJUST;
4697 width = -width;
4698 }
Guido van Rossume5372401993-03-16 12:15:04 +00004699 if (--fmtcnt >= 0)
4700 c = *fmt++;
4701 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004702 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004703 width = c - '0';
4704 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004705 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004706 if (!isdigit(c))
4707 break;
4708 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004709 PyErr_SetString(
4710 PyExc_ValueError,
4711 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004712 goto error;
4713 }
4714 width = width*10 + (c - '0');
4715 }
4716 }
4717 if (c == '.') {
4718 prec = 0;
4719 if (--fmtcnt >= 0)
4720 c = *fmt++;
4721 if (c == '*') {
4722 v = getnextarg(args, arglen, &argidx);
4723 if (v == NULL)
4724 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004725 if (!PyInt_Check(v)) {
4726 PyErr_SetString(
4727 PyExc_TypeError,
4728 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004729 goto error;
4730 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004731 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004732 if (prec < 0)
4733 prec = 0;
4734 if (--fmtcnt >= 0)
4735 c = *fmt++;
4736 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004737 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004738 prec = c - '0';
4739 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004740 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004741 if (!isdigit(c))
4742 break;
4743 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004744 PyErr_SetString(
4745 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004746 "prec too big");
4747 goto error;
4748 }
4749 prec = prec*10 + (c - '0');
4750 }
4751 }
4752 } /* prec */
4753 if (fmtcnt >= 0) {
4754 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004755 if (--fmtcnt >= 0)
4756 c = *fmt++;
4757 }
4758 }
4759 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004760 PyErr_SetString(PyExc_ValueError,
4761 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004762 goto error;
4763 }
4764 if (c != '%') {
4765 v = getnextarg(args, arglen, &argidx);
4766 if (v == NULL)
4767 goto error;
4768 }
4769 sign = 0;
4770 fill = ' ';
4771 switch (c) {
4772 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004773 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004774 len = 1;
4775 break;
4776 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004777#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004778 if (PyUnicode_Check(v)) {
4779 fmt = fmt_start;
4780 argidx = argidx_start;
4781 goto unicode;
4782 }
Georg Brandld45014b2005-10-01 17:06:00 +00004783#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004784 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004785#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004786 if (temp != NULL && PyUnicode_Check(temp)) {
4787 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004788 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004789 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004790 goto unicode;
4791 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004792#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004793 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004794 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004795 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004796 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004797 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004798 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004799 if (!PyString_Check(temp)) {
4800 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004801 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004802 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004803 goto error;
4804 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004805 pbuf = PyString_AS_STRING(temp);
4806 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004807 if (prec >= 0 && len > prec)
4808 len = prec;
4809 break;
4810 case 'i':
4811 case 'd':
4812 case 'u':
4813 case 'o':
4814 case 'x':
4815 case 'X':
4816 if (c == 'i')
4817 c = 'd';
Facundo Batistac11cecf2008-02-24 03:17:21 +00004818 isnumok = 0;
4819 if (PyNumber_Check(v)) {
4820 PyObject *iobj=NULL;
4821
4822 if (PyInt_Check(v) || (PyLong_Check(v))) {
4823 iobj = v;
4824 Py_INCREF(iobj);
4825 }
4826 else {
4827 iobj = PyNumber_Int(v);
4828 if (iobj==NULL) iobj = PyNumber_Long(v);
4829 }
4830 if (iobj!=NULL) {
4831 if (PyInt_Check(iobj)) {
4832 isnumok = 1;
4833 pbuf = formatbuf;
4834 len = formatint(pbuf,
4835 sizeof(formatbuf),
4836 flags, prec, c, iobj);
4837 Py_DECREF(iobj);
4838 if (len < 0)
4839 goto error;
4840 sign = 1;
4841 }
4842 else if (PyLong_Check(iobj)) {
4843 int ilen;
4844
4845 isnumok = 1;
4846 temp = _PyString_FormatLong(iobj, flags,
4847 prec, c, &pbuf, &ilen);
4848 Py_DECREF(iobj);
4849 len = ilen;
4850 if (!temp)
4851 goto error;
4852 sign = 1;
4853 }
4854 else {
4855 Py_DECREF(iobj);
4856 }
4857 }
Guido van Rossum4acdc231997-01-29 06:00:24 +00004858 }
Facundo Batistac11cecf2008-02-24 03:17:21 +00004859 if (!isnumok) {
4860 PyErr_Format(PyExc_TypeError,
4861 "%%%c format: a number is required, "
4862 "not %.200s", c, Py_TYPE(v)->tp_name);
4863 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004864 }
4865 if (flags & F_ZERO)
4866 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004867 break;
4868 case 'e':
4869 case 'E':
4870 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004871 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004872 case 'g':
4873 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004874 if (c == 'F')
4875 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004876 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004877 len = formatfloat(pbuf, sizeof(formatbuf),
4878 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004879 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004880 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004881 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004882 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004883 fill = '0';
4884 break;
4885 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004886#ifdef Py_USING_UNICODE
4887 if (PyUnicode_Check(v)) {
4888 fmt = fmt_start;
4889 argidx = argidx_start;
4890 goto unicode;
4891 }
4892#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004893 pbuf = formatbuf;
4894 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004895 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004896 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004897 break;
4898 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004899 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004900 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004901 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004902 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004903 (Py_ssize_t)(fmt - 1 -
4904 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004905 goto error;
4906 }
4907 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004908 if (*pbuf == '-' || *pbuf == '+') {
4909 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004910 len--;
4911 }
4912 else if (flags & F_SIGN)
4913 sign = '+';
4914 else if (flags & F_BLANK)
4915 sign = ' ';
4916 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004917 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004918 }
4919 if (width < len)
4920 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004921 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004922 reslen -= rescnt;
4923 rescnt = width + fmtcnt + 100;
4924 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004925 if (reslen < 0) {
4926 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004927 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004928 return PyErr_NoMemory();
4929 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004930 if (_PyString_Resize(&result, reslen) < 0) {
4931 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004932 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004933 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004934 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004935 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004936 }
4937 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004938 if (fill != ' ')
4939 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004940 rescnt--;
4941 if (width > len)
4942 width--;
4943 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004944 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4945 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004946 assert(pbuf[1] == c);
4947 if (fill != ' ') {
4948 *res++ = *pbuf++;
4949 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004950 }
Tim Petersfff53252001-04-12 18:38:48 +00004951 rescnt -= 2;
4952 width -= 2;
4953 if (width < 0)
4954 width = 0;
4955 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004956 }
4957 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004958 do {
4959 --rescnt;
4960 *res++ = fill;
4961 } while (--width > len);
4962 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004963 if (fill == ' ') {
4964 if (sign)
4965 *res++ = sign;
4966 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004967 (c == 'x' || c == 'X')) {
4968 assert(pbuf[0] == '0');
4969 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004970 *res++ = *pbuf++;
4971 *res++ = *pbuf++;
4972 }
4973 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004974 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004975 res += len;
4976 rescnt -= len;
4977 while (--width >= len) {
4978 --rescnt;
4979 *res++ = ' ';
4980 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004981 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004982 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004983 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004984 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004985 goto error;
4986 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004987 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004988 } /* '%' */
4989 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004990 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004991 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004992 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004993 goto error;
4994 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004995 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004996 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004997 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004998 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004999 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00005000
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005001#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00005002 unicode:
5003 if (args_owned) {
5004 Py_DECREF(args);
5005 args_owned = 0;
5006 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00005007 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00005008 if (PyTuple_Check(orig_args) && argidx > 0) {
5009 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00005010 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00005011 v = PyTuple_New(n);
5012 if (v == NULL)
5013 goto error;
5014 while (--n >= 0) {
5015 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5016 Py_INCREF(w);
5017 PyTuple_SET_ITEM(v, n, w);
5018 }
5019 args = v;
5020 } else {
5021 Py_INCREF(orig_args);
5022 args = orig_args;
5023 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005024 args_owned = 1;
5025 /* Take what we have of the result and let the Unicode formatting
5026 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00005027 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005028 if (_PyString_Resize(&result, rescnt))
5029 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00005030 fmtcnt = PyString_GET_SIZE(format) - \
5031 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005032 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5033 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00005034 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005035 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00005036 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005037 if (v == NULL)
5038 goto error;
5039 /* Paste what we have (result) to what the Unicode formatting
5040 function returned (v) and return the result (or error) */
5041 w = PyUnicode_Concat(result, v);
5042 Py_DECREF(result);
5043 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005044 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005045 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005046#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005047
Guido van Rossume5372401993-03-16 12:15:04 +00005048 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005049 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005050 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005051 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005052 }
Guido van Rossume5372401993-03-16 12:15:04 +00005053 return NULL;
5054}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005055
Guido van Rossum2a61e741997-01-18 07:55:05 +00005056void
Fred Drakeba096332000-07-09 07:04:36 +00005057PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005058{
5059 register PyStringObject *s = (PyStringObject *)(*p);
5060 PyObject *t;
5061 if (s == NULL || !PyString_Check(s))
5062 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005063 /* If it's a string subclass, we don't really know what putting
5064 it in the interned dict might do. */
5065 if (!PyString_CheckExact(s))
5066 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005067 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005068 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005069 if (interned == NULL) {
5070 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005071 if (interned == NULL) {
5072 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005073 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005074 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005075 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005076 t = PyDict_GetItem(interned, (PyObject *)s);
5077 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005078 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005079 Py_DECREF(*p);
5080 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005081 return;
5082 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005083
Armin Rigo79f7ad22004-08-07 19:27:39 +00005084 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005085 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005086 return;
5087 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005088 /* The two references in interned are not counted by refcnt.
5089 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005090 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005091 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005092}
5093
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005094void
5095PyString_InternImmortal(PyObject **p)
5096{
5097 PyString_InternInPlace(p);
5098 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5099 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5100 Py_INCREF(*p);
5101 }
5102}
5103
Guido van Rossum2a61e741997-01-18 07:55:05 +00005104
5105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005106PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005107{
5108 PyObject *s = PyString_FromString(cp);
5109 if (s == NULL)
5110 return NULL;
5111 PyString_InternInPlace(&s);
5112 return s;
5113}
5114
Guido van Rossum8cf04761997-08-02 02:57:45 +00005115void
Fred Drakeba096332000-07-09 07:04:36 +00005116PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005117{
5118 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005119 for (i = 0; i < UCHAR_MAX + 1; i++) {
5120 Py_XDECREF(characters[i]);
5121 characters[i] = NULL;
5122 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005123 Py_XDECREF(nullstring);
5124 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005125}
Barry Warsawa903ad982001-02-23 16:40:48 +00005126
Barry Warsawa903ad982001-02-23 16:40:48 +00005127void _Py_ReleaseInternedStrings(void)
5128{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005129 PyObject *keys;
5130 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005131 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005132 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005133
5134 if (interned == NULL || !PyDict_Check(interned))
5135 return;
5136 keys = PyDict_Keys(interned);
5137 if (keys == NULL || !PyList_Check(keys)) {
5138 PyErr_Clear();
5139 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005140 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005141
5142 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5143 detector, interned strings are not forcibly deallocated; rather, we
5144 give them their stolen references back, and then clear and DECREF
5145 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005146
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005147 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005148 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5149 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005150 for (i = 0; i < n; i++) {
5151 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5152 switch (s->ob_sstate) {
5153 case SSTATE_NOT_INTERNED:
5154 /* XXX Shouldn't happen */
5155 break;
5156 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005157 Py_REFCNT(s) += 1;
5158 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005159 break;
5160 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005161 Py_REFCNT(s) += 2;
5162 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005163 break;
5164 default:
5165 Py_FatalError("Inconsistent interned string state.");
5166 }
5167 s->ob_sstate = SSTATE_NOT_INTERNED;
5168 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005169 fprintf(stderr, "total size of all interned strings: "
5170 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5171 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005172 Py_DECREF(keys);
5173 PyDict_Clear(interned);
5174 Py_DECREF(interned);
5175 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005176}