blob: 4c36e4bee8c05674c2d5151f2fb308204a6f001a [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Eric Smitha9f7d622008-02-17 19:46:49 +00007#include "formatter_string.h"
8
Guido van Rossum013142a1994-08-30 08:19:36 +00009#include <ctype.h>
10
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000011#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
Tim Petersae1d0c92006-03-17 03:29:34 +000023 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000024 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000028/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000029 For both PyString_FromString() and PyString_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000034 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000035
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000036 For PyString_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyString_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyString_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000053*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000055PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Tim Peters9e897f42001-05-09 07:37:07 +000057 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000058 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000077 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000078 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000080 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000082 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000084 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000086 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000088 PyObject *t = (PyObject *)op;
89 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000090 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000094 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000096 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101}
102
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000103PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000104PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Tim Peters62de65b2001-12-06 20:29:32 +0000106 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000107 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000108
109 assert(str != NULL);
110 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000111 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 PyErr_SetString(PyExc_OverflowError,
113 "string is too long for a Python string");
114 return NULL;
115 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 if (size == 0 && (op = nullstring) != NULL) {
117#ifdef COUNT_ALLOCS
118 null_strings++;
119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 Py_INCREF(op);
121 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 }
123 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
124#ifdef COUNT_ALLOCS
125 one_strings++;
126#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000127 Py_INCREF(op);
128 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000129 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000130
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000131 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000132 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000133 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000137 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000138 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000139 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000141 PyObject *t = (PyObject *)op;
142 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000143 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000149 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000154}
155
Barry Warsawdadace02001-08-24 18:32:06 +0000156PyObject *
157PyString_FromFormatV(const char *format, va_list vargs)
158{
Tim Petersc15c4f12001-10-02 21:32:07 +0000159 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000160 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000161 const char* f;
162 char *s;
163 PyObject* string;
164
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000166 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#ifdef __va_copy
169 __va_copy(count, vargs);
170#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000171 count = vargs;
172#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000173#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000174 /* step 1: figure out how large a buffer we need */
175 for (f = format; *f; f++) {
176 if (*f == '%') {
177 const char* p = f;
178 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
179 ;
180
Tim Peters8931ff12006-05-13 23:28:20 +0000181 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
182 * they don't affect the amount of space we reserve.
183 */
184 if ((*f == 'l' || *f == 'z') &&
185 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000195 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000257 /* handle the long flag, but only for %ld and %lu.
258 others can be added when necessary. */
259 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000264 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000276 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000277 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
278 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000279 else
280 sprintf(s, "%d", va_arg(vargs, int));
281 s += strlen(s);
282 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000283 case 'u':
284 if (longflag)
285 sprintf(s, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(s, "%u",
292 va_arg(vargs, unsigned int));
293 s += strlen(s);
294 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000295 case 'i':
296 sprintf(s, "%i", va_arg(vargs, int));
297 s += strlen(s);
298 break;
299 case 'x':
300 sprintf(s, "%x", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 's':
304 p = va_arg(vargs, char*);
305 i = strlen(p);
306 if (n > 0 && i > n)
307 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000308 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000309 s += i;
310 break;
311 case 'p':
312 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000313 /* %p is ill-defined: ensure leading 0x. */
314 if (s[1] == 'X')
315 s[1] = 'x';
316 else if (s[1] != 'x') {
317 memmove(s+2, s, strlen(s)+1);
318 s[0] = '0';
319 s[1] = 'x';
320 }
Barry Warsawdadace02001-08-24 18:32:06 +0000321 s += strlen(s);
322 break;
323 case '%':
324 *s++ = '%';
325 break;
326 default:
327 strcpy(s, p);
328 s += strlen(s);
329 goto end;
330 }
331 } else
332 *s++ = *f;
333 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000336 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000337 return string;
338}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339
Barry Warsawdadace02001-08-24 18:32:06 +0000340PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000341PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000342{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000343 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000344 va_list vargs;
345
346#ifdef HAVE_STDARG_PROTOTYPES
347 va_start(vargs, format);
348#else
349 va_start(vargs);
350#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000351 ret = PyString_FromFormatV(format, vargs);
352 va_end(vargs);
353 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354}
355
356
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000358 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 const char *encoding,
360 const char *errors)
361{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 PyObject *v, *str;
363
364 str = PyString_FromStringAndSize(s, size);
365 if (str == NULL)
366 return NULL;
367 v = PyString_AsDecodedString(str, encoding, errors);
368 Py_DECREF(str);
369 return v;
370}
371
372PyObject *PyString_AsDecodedObject(PyObject *str,
373 const char *encoding,
374 const char *errors)
375{
376 PyObject *v;
377
378 if (!PyString_Check(str)) {
379 PyErr_BadArgument();
380 goto onError;
381 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000382
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 if (encoding == NULL) {
384#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000385 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386#else
387 PyErr_SetString(PyExc_ValueError, "no encoding specified");
388 goto onError;
389#endif
390 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000391
392 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 v = PyCodec_Decode(str, encoding, errors);
394 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000396
397 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000398
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400 return NULL;
401}
402
403PyObject *PyString_AsDecodedString(PyObject *str,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v;
408
409 v = PyString_AsDecodedObject(str, encoding, errors);
410 if (v == NULL)
411 goto onError;
412
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000413#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000414 /* Convert Unicode to a string using the default encoding */
415 if (PyUnicode_Check(v)) {
416 PyObject *temp = v;
417 v = PyUnicode_AsEncodedString(v, NULL, NULL);
418 Py_DECREF(temp);
419 if (v == NULL)
420 goto onError;
421 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000423 if (!PyString_Check(v)) {
424 PyErr_Format(PyExc_TypeError,
425 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000426 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427 Py_DECREF(v);
428 goto onError;
429 }
430
431 return v;
432
433 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 return NULL;
435}
436
437PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000438 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 const char *encoding,
440 const char *errors)
441{
442 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000443
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 str = PyString_FromStringAndSize(s, size);
445 if (str == NULL)
446 return NULL;
447 v = PyString_AsEncodedString(str, encoding, errors);
448 Py_DECREF(str);
449 return v;
450}
451
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 const char *encoding,
454 const char *errors)
455{
456 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000457
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000458 if (!PyString_Check(str)) {
459 PyErr_BadArgument();
460 goto onError;
461 }
462
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000463 if (encoding == NULL) {
464#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000465 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466#else
467 PyErr_SetString(PyExc_ValueError, "no encoding specified");
468 goto onError;
469#endif
470 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000471
472 /* Encode via the codec registry */
473 v = PyCodec_Encode(str, encoding, errors);
474 if (v == NULL)
475 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476
477 return v;
478
479 onError:
480 return NULL;
481}
482
483PyObject *PyString_AsEncodedString(PyObject *str,
484 const char *encoding,
485 const char *errors)
486{
487 PyObject *v;
488
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000489 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490 if (v == NULL)
491 goto onError;
492
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000493#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000494 /* Convert Unicode to a string using the default encoding */
495 if (PyUnicode_Check(v)) {
496 PyObject *temp = v;
497 v = PyUnicode_AsEncodedString(v, NULL, NULL);
498 Py_DECREF(temp);
499 if (v == NULL)
500 goto onError;
501 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000502#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000503 if (!PyString_Check(v)) {
504 PyErr_Format(PyExc_TypeError,
505 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000506 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 Py_DECREF(v);
508 goto onError;
509 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000512
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 onError:
514 return NULL;
515}
516
Guido van Rossum234f9421993-06-17 12:35:49 +0000517static void
Fred Drakeba096332000-07-09 07:04:36 +0000518string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000520 switch (PyString_CHECK_INTERNED(op)) {
521 case SSTATE_NOT_INTERNED:
522 break;
523
524 case SSTATE_INTERNED_MORTAL:
525 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000526 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000527 if (PyDict_DelItem(interned, op) != 0)
528 Py_FatalError(
529 "deletion of interned string failed");
530 break;
531
532 case SSTATE_INTERNED_IMMORTAL:
533 Py_FatalError("Immortal interned string died.");
534
535 default:
536 Py_FatalError("Inconsistent interned string state.");
537 }
Christian Heimese93237d2007-12-19 02:37:44 +0000538 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000539}
540
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000541/* Unescape a backslash-escaped string. If unicode is non-zero,
542 the string is a u-literal. If recode_encoding is non-zero,
543 the string is UTF-8 encoded and should be re-encoded in the
544 specified encoding. */
545
546PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000549 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000550 const char *recode_encoding)
551{
552 int c;
553 char *p, *buf;
554 const char *end;
555 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000556 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000557 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 if (v == NULL)
559 return NULL;
560 p = buf = PyString_AsString(v);
561 end = s + len;
562 while (s < end) {
563 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000564 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000565#ifdef Py_USING_UNICODE
566 if (recode_encoding && (*s & 0x80)) {
567 PyObject *u, *w;
568 char *r;
569 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000570 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 t = s;
572 /* Decode non-ASCII bytes as UTF-8. */
573 while (t < end && (*t & 0x80)) t++;
574 u = PyUnicode_DecodeUTF8(s, t - s, errors);
575 if(!u) goto failed;
576
577 /* Recode them in target encoding. */
578 w = PyUnicode_AsEncodedString(
579 u, recode_encoding, errors);
580 Py_DECREF(u);
581 if (!w) goto failed;
582
583 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000584 assert(PyString_Check(w));
585 r = PyString_AS_STRING(w);
586 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000587 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000588 p += rn;
589 Py_DECREF(w);
590 s = t;
591 } else {
592 *p++ = *s++;
593 }
594#else
595 *p++ = *s++;
596#endif
597 continue;
598 }
599 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000600 if (s==end) {
601 PyErr_SetString(PyExc_ValueError,
602 "Trailing \\ in string");
603 goto failed;
604 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000605 switch (*s++) {
606 /* XXX This assumes ASCII! */
607 case '\n': break;
608 case '\\': *p++ = '\\'; break;
609 case '\'': *p++ = '\''; break;
610 case '\"': *p++ = '\"'; break;
611 case 'b': *p++ = '\b'; break;
612 case 'f': *p++ = '\014'; break; /* FF */
613 case 't': *p++ = '\t'; break;
614 case 'n': *p++ = '\n'; break;
615 case 'r': *p++ = '\r'; break;
616 case 'v': *p++ = '\013'; break; /* VT */
617 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
618 case '0': case '1': case '2': case '3':
619 case '4': case '5': case '6': case '7':
620 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000623 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000624 c = (c<<3) + *s++ - '0';
625 }
626 *p++ = c;
627 break;
628 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000629 if (s+1 < end &&
630 isxdigit(Py_CHARMASK(s[0])) &&
631 isxdigit(Py_CHARMASK(s[1])))
632 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000633 unsigned int x = 0;
634 c = Py_CHARMASK(*s);
635 s++;
636 if (isdigit(c))
637 x = c - '0';
638 else if (islower(c))
639 x = 10 + c - 'a';
640 else
641 x = 10 + c - 'A';
642 x = x << 4;
643 c = Py_CHARMASK(*s);
644 s++;
645 if (isdigit(c))
646 x += c - '0';
647 else if (islower(c))
648 x += 10 + c - 'a';
649 else
650 x += 10 + c - 'A';
651 *p++ = x;
652 break;
653 }
654 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000655 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000657 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 if (strcmp(errors, "replace") == 0) {
660 *p++ = '?';
661 } else if (strcmp(errors, "ignore") == 0)
662 /* do nothing */;
663 else {
664 PyErr_Format(PyExc_ValueError,
665 "decoding error; "
666 "unknown error handling code: %.400s",
667 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000668 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000669 }
670#ifndef Py_USING_UNICODE
671 case 'u':
672 case 'U':
673 case 'N':
674 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000675 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 "Unicode escapes not legal "
677 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000678 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000679 }
680#endif
681 default:
682 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000683 s--;
684 goto non_esc; /* an arbitry number of unescaped
685 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 }
687 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000688 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000689 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000690 return v;
691 failed:
692 Py_DECREF(v);
693 return NULL;
694}
695
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000696/* -------------------------------------------------------------------- */
697/* object api */
698
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700string_getsize(register PyObject *op)
701{
702 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (PyString_AsStringAndSize(op, &s, &len))
705 return -1;
706 return len;
707}
708
709static /*const*/ char *
710string_getbuffer(register PyObject *op)
711{
712 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000714 if (PyString_AsStringAndSize(op, &s, &len))
715 return NULL;
716 return s;
717}
718
Martin v. Löwis18e16552006-02-15 17:27:45 +0000719Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000720PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722 if (!PyString_Check(op))
723 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000724 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725}
726
727/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000728PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 if (!PyString_Check(op))
731 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000732 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733}
734
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735int
736PyString_AsStringAndSize(register PyObject *obj,
737 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000738 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739{
740 if (s == NULL) {
741 PyErr_BadInternalCall();
742 return -1;
743 }
744
745 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000747 if (PyUnicode_Check(obj)) {
748 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
749 if (obj == NULL)
750 return -1;
751 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000752 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000753#endif
754 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000755 PyErr_Format(PyExc_TypeError,
756 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000757 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000758 return -1;
759 }
760 }
761
762 *s = PyString_AS_STRING(obj);
763 if (len != NULL)
764 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000765 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000766 PyErr_SetString(PyExc_TypeError,
767 "expected string without null bytes");
768 return -1;
769 }
770 return 0;
771}
772
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000774/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000775
Eric Smitha9f7d622008-02-17 19:46:49 +0000776#include "stringlib/stringdefs.h"
Fredrik Lundha50d2012006-05-26 17:04:58 +0000777#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000778
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000779#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000780#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000784static int
Fred Drakeba096332000-07-09 07:04:36 +0000785string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786{
Brett Cannon01531592007-09-17 03:28:34 +0000787 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000790
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000791 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000792 if (! PyString_CheckExact(op)) {
793 int ret;
794 /* A str subclass may have its own __str__ method. */
795 op = (PyStringObject *) PyObject_Str((PyObject *)op);
796 if (op == NULL)
797 return -1;
798 ret = string_print(op, fp, flags);
799 Py_DECREF(op);
800 return ret;
801 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000802 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000803 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000804 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000805 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000806 while (size > INT_MAX) {
807 /* Very long strings cannot be written atomically.
808 * But don't write exactly INT_MAX bytes at a time
809 * to avoid memory aligment issues.
810 */
811 const int chunk_size = INT_MAX & ~0x3FFF;
812 fwrite(data, 1, chunk_size, fp);
813 data += chunk_size;
814 size -= chunk_size;
815 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000816#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000817 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000818#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000819 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000820#endif
Brett Cannon01531592007-09-17 03:28:34 +0000821 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000822 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824
Thomas Wouters7e474022000-07-16 12:04:32 +0000825 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000827 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
828 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 quote = '"';
830
Christian Heimese93237d2007-12-19 02:37:44 +0000831 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000832 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000834 for (i = 0; i < str_len; i++) {
835 /* Since strings are immutable and the caller should have a
836 reference, accessing the interal buffer should not be an issue
837 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000841 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000842 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000843 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\r");
847 else if (c < ' ' || c >= 0x7f)
848 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000849 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000852 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000853 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000854 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000857PyObject *
858PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000861 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000862 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000863 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000864 PyErr_SetString(PyExc_OverflowError,
865 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000866 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000867 }
868 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000870 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 }
872 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000873 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 register char c;
875 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 int quote;
877
Thomas Wouters7e474022000-07-16 12:04:32 +0000878 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000879 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000880 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000881 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
882 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 quote = '"';
884
Tim Peters9161c8b2001-12-03 01:55:38 +0000885 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000887 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 /* There's at least enough room for a hex escape
889 and a closing quote. */
890 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000894 else if (c == '\t')
895 *p++ = '\\', *p++ = 't';
896 else if (c == '\n')
897 *p++ = '\\', *p++ = 'n';
898 else if (c == '\r')
899 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000900 else if (c < ' ' || c >= 0x7f) {
901 /* For performance, we don't want to call
902 PyOS_snprintf here (extra layers of
903 function call). */
904 sprintf(p, "\\x%02x", c & 0xff);
905 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000906 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000907 else
908 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000910 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000911 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000914 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000915 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum189f1df2001-05-01 16:51:53 +0000919static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000920string_repr(PyObject *op)
921{
922 return PyString_Repr(op, 1);
923}
924
925static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000926string_str(PyObject *s)
927{
Tim Petersc9933152001-10-16 20:18:24 +0000928 assert(PyString_Check(s));
929 if (PyString_CheckExact(s)) {
930 Py_INCREF(s);
931 return s;
932 }
933 else {
934 /* Subtype -- return genuine string with the same value. */
935 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000936 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000937 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000938}
939
Martin v. Löwis18e16552006-02-15 17:27:45 +0000940static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000941string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942{
Christian Heimese93237d2007-12-19 02:37:44 +0000943 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944}
945
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000947string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948{
Andrew Dalke598710c2006-05-25 18:18:39 +0000949 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 register PyStringObject *op;
951 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000952#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000953 if (PyUnicode_Check(bb))
954 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000955#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000956 if (PyBytes_Check(bb))
957 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000958 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000959 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000960 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000961 return NULL;
962 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000965 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000966 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000967 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000968 Py_INCREF(bb);
969 return bb;
970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 Py_INCREF(a);
972 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973 }
Christian Heimese93237d2007-12-19 02:37:44 +0000974 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000975 if (size < 0) {
976 PyErr_SetString(PyExc_OverflowError,
977 "strings are too large to concat");
978 return NULL;
979 }
980
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000981 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000982 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000985 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000986 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000987 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000988 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
989 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000990 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992#undef b
993}
994
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000996string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000997{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000998 register Py_ssize_t i;
999 register Py_ssize_t j;
1000 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001002 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 if (n < 0)
1004 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001005 /* watch out for overflows: the size can overflow int,
1006 * and the # of bytes needed can overflow size_t
1007 */
Christian Heimese93237d2007-12-19 02:37:44 +00001008 size = Py_SIZE(a) * n;
1009 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001010 PyErr_SetString(PyExc_OverflowError,
1011 "repeated string is too long");
1012 return NULL;
1013 }
Christian Heimese93237d2007-12-19 02:37:44 +00001014 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001015 Py_INCREF(a);
1016 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001017 }
Tim Peterse7c05322004-06-27 17:24:49 +00001018 nbytes = (size_t)size;
1019 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001020 PyErr_SetString(PyExc_OverflowError,
1021 "repeated string is too long");
1022 return NULL;
1023 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001025 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001026 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001028 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001029 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001030 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001031 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001032 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001033 memset(op->ob_sval, a->ob_sval[0] , n);
1034 return (PyObject *) op;
1035 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001036 i = 0;
1037 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001038 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1039 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 }
1041 while (i < size) {
1042 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001043 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 i += j;
1045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047}
1048
1049/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1050
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001051static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001052string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001054 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055{
1056 if (i < 0)
1057 i = 0;
1058 if (j < 0)
1059 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001060 if (j > Py_SIZE(a))
1061 j = Py_SIZE(a);
1062 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001063 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001064 Py_INCREF(a);
1065 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001066 }
1067 if (j < i)
1068 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001069 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070}
1071
Guido van Rossum9284a572000-03-07 15:53:43 +00001072static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001074{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001075 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001076#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077 if (PyUnicode_Check(sub_obj))
1078 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001079#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001081 PyErr_Format(PyExc_TypeError,
1082 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001083 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001084 return -1;
1085 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001086 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001087
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001088 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001089}
1090
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001091static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001092string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001094 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001096 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001097 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001098 return NULL;
1099 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001100 pchar = a->ob_sval[i];
1101 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001102 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001103 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 else {
1105#ifdef COUNT_ALLOCS
1106 one_strings++;
1107#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001108 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001109 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001110 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001111}
1112
Martin v. Löwiscd353062001-05-24 16:56:35 +00001113static PyObject*
1114string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001117 Py_ssize_t len_a, len_b;
1118 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 PyObject *result;
1120
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001121 /* Make sure both arguments are strings. */
1122 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 result = Py_NotImplemented;
1124 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001125 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001126 if (a == b) {
1127 switch (op) {
1128 case Py_EQ:case Py_LE:case Py_GE:
1129 result = Py_True;
1130 goto out;
1131 case Py_NE:case Py_LT:case Py_GT:
1132 result = Py_False;
1133 goto out;
1134 }
1135 }
1136 if (op == Py_EQ) {
1137 /* Supporting Py_NE here as well does not save
1138 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001139 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001140 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001141 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001142 result = Py_True;
1143 } else {
1144 result = Py_False;
1145 }
1146 goto out;
1147 }
Christian Heimese93237d2007-12-19 02:37:44 +00001148 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001149 min_len = (len_a < len_b) ? len_a : len_b;
1150 if (min_len > 0) {
1151 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1152 if (c==0)
1153 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001154 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001155 c = 0;
1156 if (c == 0)
1157 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1158 switch (op) {
1159 case Py_LT: c = c < 0; break;
1160 case Py_LE: c = c <= 0; break;
1161 case Py_EQ: assert(0); break; /* unreachable */
1162 case Py_NE: c = c != 0; break;
1163 case Py_GT: c = c > 0; break;
1164 case Py_GE: c = c >= 0; break;
1165 default:
1166 result = Py_NotImplemented;
1167 goto out;
1168 }
1169 result = c ? Py_True : Py_False;
1170 out:
1171 Py_INCREF(result);
1172 return result;
1173}
1174
1175int
1176_PyString_Eq(PyObject *o1, PyObject *o2)
1177{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001178 PyStringObject *a = (PyStringObject*) o1;
1179 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001180 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001181 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001182 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001183}
1184
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185static long
Fred Drakeba096332000-07-09 07:04:36 +00001186string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001188 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 register unsigned char *p;
1190 register long x;
1191
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001192 if (a->ob_shash != -1)
1193 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001194 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 p = (unsigned char *) a->ob_sval;
1196 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001197 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001198 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001199 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001200 if (x == -1)
1201 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001202 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001203 return x;
1204}
1205
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206static PyObject*
1207string_subscript(PyStringObject* self, PyObject* item)
1208{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001209 if (PyIndex_Check(item)) {
1210 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 if (i == -1 && PyErr_Occurred())
1212 return NULL;
1213 if (i < 0)
1214 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001215 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001216 }
1217 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001218 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 char* source_buf;
1220 char* result_buf;
1221 PyObject* result;
1222
Tim Petersae1d0c92006-03-17 03:29:34 +00001223 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 PyString_GET_SIZE(self),
1225 &start, &stop, &step, &slicelength) < 0) {
1226 return NULL;
1227 }
1228
1229 if (slicelength <= 0) {
1230 return PyString_FromStringAndSize("", 0);
1231 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001232 else if (start == 0 && step == 1 &&
1233 slicelength == PyString_GET_SIZE(self) &&
1234 PyString_CheckExact(self)) {
1235 Py_INCREF(self);
1236 return (PyObject *)self;
1237 }
1238 else if (step == 1) {
1239 return PyString_FromStringAndSize(
1240 PyString_AS_STRING(self) + start,
1241 slicelength);
1242 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001243 else {
1244 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001245 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001246 if (result_buf == NULL)
1247 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248
Tim Petersae1d0c92006-03-17 03:29:34 +00001249 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001250 cur += step, i++) {
1251 result_buf[i] = source_buf[cur];
1252 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001253
1254 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001255 slicelength);
1256 PyMem_Free(result_buf);
1257 return result;
1258 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001259 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001260 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001261 PyErr_Format(PyExc_TypeError,
1262 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001263 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 return NULL;
1265 }
1266}
1267
Martin v. Löwis18e16552006-02-15 17:27:45 +00001268static Py_ssize_t
1269string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001270{
1271 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001272 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001273 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274 return -1;
1275 }
1276 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001277 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278}
1279
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280static Py_ssize_t
1281string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282{
Guido van Rossum045e6881997-09-08 18:30:11 +00001283 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001284 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001285 return -1;
1286}
1287
Martin v. Löwis18e16552006-02-15 17:27:45 +00001288static Py_ssize_t
1289string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001290{
1291 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001292 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001293 return 1;
1294}
1295
Martin v. Löwis18e16552006-02-15 17:27:45 +00001296static Py_ssize_t
1297string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001298{
1299 if ( index != 0 ) {
1300 PyErr_SetString(PyExc_SystemError,
1301 "accessing non-existent string segment");
1302 return -1;
1303 }
1304 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001305 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001306}
1307
Christian Heimes1a6387e2008-03-26 12:49:49 +00001308static int
1309string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1310{
1311 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1312 0, flags);
1313}
1314
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001315static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001316 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001317 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001318 (ssizeargfunc)string_repeat, /*sq_repeat*/
1319 (ssizeargfunc)string_item, /*sq_item*/
1320 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001321 0, /*sq_ass_item*/
1322 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001323 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001324};
1325
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001326static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001327 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001328 (binaryfunc)string_subscript,
1329 0,
1330};
1331
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001332static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001333 (readbufferproc)string_buffer_getreadbuf,
1334 (writebufferproc)string_buffer_getwritebuf,
1335 (segcountproc)string_buffer_getsegcount,
1336 (charbufferproc)string_buffer_getcharbuf,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001337 (getbufferproc)string_buffer_getbuffer,
1338 0, /* XXX */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001339};
1340
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341
1342
1343#define LEFTSTRIP 0
1344#define RIGHTSTRIP 1
1345#define BOTHSTRIP 2
1346
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001347/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001348static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1349
1350#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001351
Andrew Dalke525eab32006-05-26 14:00:45 +00001352
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001353/* Don't call if length < 2 */
1354#define Py_STRING_MATCH(target, offset, pattern, length) \
1355 (target[offset] == pattern[0] && \
1356 target[offset+length-1] == pattern[length-1] && \
1357 !memcmp(target+offset+1, pattern+1, length-2) )
1358
1359
Andrew Dalke525eab32006-05-26 14:00:45 +00001360/* Overallocate the initial list to reduce the number of reallocs for small
1361 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1362 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1363 text (roughly 11 words per line) and field delimited data (usually 1-10
1364 fields). For large strings the split algorithms are bandwidth limited
1365 so increasing the preallocation likely will not improve things.*/
1366
1367#define MAX_PREALLOC 12
1368
1369/* 5 splits gives 6 elements */
1370#define PREALLOC_SIZE(maxsplit) \
1371 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1372
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001373#define SPLIT_APPEND(data, left, right) \
1374 str = PyString_FromStringAndSize((data) + (left), \
1375 (right) - (left)); \
1376 if (str == NULL) \
1377 goto onError; \
1378 if (PyList_Append(list, str)) { \
1379 Py_DECREF(str); \
1380 goto onError; \
1381 } \
1382 else \
1383 Py_DECREF(str);
1384
Andrew Dalke02758d62006-05-26 15:21:01 +00001385#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001386 str = PyString_FromStringAndSize((data) + (left), \
1387 (right) - (left)); \
1388 if (str == NULL) \
1389 goto onError; \
1390 if (count < MAX_PREALLOC) { \
1391 PyList_SET_ITEM(list, count, str); \
1392 } else { \
1393 if (PyList_Append(list, str)) { \
1394 Py_DECREF(str); \
1395 goto onError; \
1396 } \
1397 else \
1398 Py_DECREF(str); \
1399 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001400 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001401
1402/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001403#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001404
Andrew Dalke02758d62006-05-26 15:21:01 +00001405#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1406#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1407#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1408#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1409
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001410Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001411split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412{
Skip Montanaro26015492007-12-08 15:33:24 +00001413 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001414 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001415 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001416 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417
1418 if (list == NULL)
1419 return NULL;
1420
Andrew Dalke02758d62006-05-26 15:21:01 +00001421 i = j = 0;
1422
1423 while (maxsplit-- > 0) {
1424 SKIP_SPACE(s, i, len);
1425 if (i==len) break;
1426 j = i; i++;
1427 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001428 if (j == 0 && i == len && PyString_CheckExact(self)) {
1429 /* No whitespace in self, so just use it as list[0] */
1430 Py_INCREF(self);
1431 PyList_SET_ITEM(list, 0, (PyObject *)self);
1432 count++;
1433 break;
1434 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001435 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001437
1438 if (i < len) {
1439 /* Only occurs when maxsplit was reached */
1440 /* Skip any remaining whitespace and copy to end of string */
1441 SKIP_SPACE(s, i, len);
1442 if (i != len)
1443 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001444 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001445 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 Py_DECREF(list);
1449 return NULL;
1450}
1451
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001452Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001453split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001454{
Skip Montanaro26015492007-12-08 15:33:24 +00001455 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001456 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001457 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001458 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001459
1460 if (list == NULL)
1461 return NULL;
1462
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001463 i = j = 0;
1464 while ((j < len) && (maxcount-- > 0)) {
1465 for(; j<len; j++) {
1466 /* I found that using memchr makes no difference */
1467 if (s[j] == ch) {
1468 SPLIT_ADD(s, i, j);
1469 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001471 }
1472 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 }
Skip Montanaro26015492007-12-08 15:33:24 +00001474 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1475 /* ch not in self, so just use self as list[0] */
1476 Py_INCREF(self);
1477 PyList_SET_ITEM(list, 0, (PyObject *)self);
1478 count++;
1479 }
1480 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001481 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001482 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001483 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484 return list;
1485
1486 onError:
1487 Py_DECREF(list);
1488 return NULL;
1489}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001491PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492"S.split([sep [,maxsplit]]) -> list of strings\n\
1493\n\
1494Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001495delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001496splits are done. If sep is not specified or is None, any\n\
1497whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498
1499static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001500string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001502 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001503 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001504 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001505 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001506#ifdef USE_FAST
1507 Py_ssize_t pos;
1508#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509
Martin v. Löwis9c830762006-04-13 08:37:17 +00001510 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001513 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001514 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001515 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516 if (PyString_Check(subobj)) {
1517 sub = PyString_AS_STRING(subobj);
1518 n = PyString_GET_SIZE(subobj);
1519 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001520#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001521 else if (PyUnicode_Check(subobj))
1522 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001523#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001524 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1525 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001526
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 if (n == 0) {
1528 PyErr_SetString(PyExc_ValueError, "empty separator");
1529 return NULL;
1530 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001531 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001532 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533
Andrew Dalke525eab32006-05-26 14:00:45 +00001534 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535 if (list == NULL)
1536 return NULL;
1537
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001538#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001540 while (maxsplit-- > 0) {
1541 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1542 if (pos < 0)
1543 break;
1544 j = i+pos;
1545 SPLIT_ADD(s, i, j);
1546 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001548#else
1549 i = j = 0;
1550 while ((j+n <= len) && (maxsplit-- > 0)) {
1551 for (; j+n <= len; j++) {
1552 if (Py_STRING_MATCH(s, j, sub, n)) {
1553 SPLIT_ADD(s, i, j);
1554 i = j = j + n;
1555 break;
1556 }
1557 }
1558 }
1559#endif
1560 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001561 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562 return list;
1563
Andrew Dalke525eab32006-05-26 14:00:45 +00001564 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 Py_DECREF(list);
1566 return NULL;
1567}
1568
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001569PyDoc_STRVAR(partition__doc__,
1570"S.partition(sep) -> (head, sep, tail)\n\
1571\n\
1572Searches for the separator sep in S, and returns the part before it,\n\
1573the separator itself, and the part after it. If the separator is not\n\
1574found, returns S and two empty strings.");
1575
1576static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001577string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001579 const char *sep;
1580 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001581
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001582 if (PyString_Check(sep_obj)) {
1583 sep = PyString_AS_STRING(sep_obj);
1584 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001585 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001586#ifdef Py_USING_UNICODE
1587 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001588 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001589#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001590 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001591 return NULL;
1592
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001593 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001594 (PyObject*) self,
1595 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1596 sep_obj, sep, sep_len
1597 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001598}
1599
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001600PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001601"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001602\n\
1603Searches for the separator sep in S, starting at the end of S, and returns\n\
1604the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001605separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001606
1607static PyObject *
1608string_rpartition(PyStringObject *self, PyObject *sep_obj)
1609{
1610 const char *sep;
1611 Py_ssize_t sep_len;
1612
1613 if (PyString_Check(sep_obj)) {
1614 sep = PyString_AS_STRING(sep_obj);
1615 sep_len = PyString_GET_SIZE(sep_obj);
1616 }
1617#ifdef Py_USING_UNICODE
1618 else if (PyUnicode_Check(sep_obj))
1619 return PyUnicode_Partition((PyObject *) self, sep_obj);
1620#endif
1621 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1622 return NULL;
1623
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001624 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001625 (PyObject*) self,
1626 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1627 sep_obj, sep, sep_len
1628 );
1629}
1630
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001631Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001632rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001633{
Skip Montanaro26015492007-12-08 15:33:24 +00001634 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001635 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001636 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001637 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001638
1639 if (list == NULL)
1640 return NULL;
1641
Andrew Dalke02758d62006-05-26 15:21:01 +00001642 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001643
Andrew Dalke02758d62006-05-26 15:21:01 +00001644 while (maxsplit-- > 0) {
1645 RSKIP_SPACE(s, i);
1646 if (i<0) break;
1647 j = i; i--;
1648 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001649 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1650 /* No whitespace in self, so just use it as list[0] */
1651 Py_INCREF(self);
1652 PyList_SET_ITEM(list, 0, (PyObject *)self);
1653 count++;
1654 break;
1655 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001656 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001658 if (i >= 0) {
1659 /* Only occurs when maxsplit was reached */
1660 /* Skip any remaining whitespace and copy to beginning of string */
1661 RSKIP_SPACE(s, i);
1662 if (i >= 0)
1663 SPLIT_ADD(s, 0, i + 1);
1664
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001665 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001666 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001667 if (PyList_Reverse(list) < 0)
1668 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001669 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001670 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001671 Py_DECREF(list);
1672 return NULL;
1673}
1674
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001675Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001676rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001677{
Skip Montanaro26015492007-12-08 15:33:24 +00001678 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001679 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001680 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001681 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001682
1683 if (list == NULL)
1684 return NULL;
1685
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001686 i = j = len - 1;
1687 while ((i >= 0) && (maxcount-- > 0)) {
1688 for (; i >= 0; i--) {
1689 if (s[i] == ch) {
1690 SPLIT_ADD(s, i + 1, j + 1);
1691 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001692 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001693 }
1694 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001695 }
Skip Montanaro26015492007-12-08 15:33:24 +00001696 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1697 /* ch not in self, so just use self as list[0] */
1698 Py_INCREF(self);
1699 PyList_SET_ITEM(list, 0, (PyObject *)self);
1700 count++;
1701 }
1702 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001703 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001704 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001705 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001706 if (PyList_Reverse(list) < 0)
1707 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001708 return list;
1709
1710 onError:
1711 Py_DECREF(list);
1712 return NULL;
1713}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714
1715PyDoc_STRVAR(rsplit__doc__,
1716"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1717\n\
1718Return a list of the words in the string S, using sep as the\n\
1719delimiter string, starting at the end of the string and working\n\
1720to the front. If maxsplit is given, at most maxsplit splits are\n\
1721done. If sep is not specified or is None, any whitespace string\n\
1722is a separator.");
1723
1724static PyObject *
1725string_rsplit(PyStringObject *self, PyObject *args)
1726{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001727 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001728 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001729 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001730 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001731
Martin v. Löwis9c830762006-04-13 08:37:17 +00001732 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001733 return NULL;
1734 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001735 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001737 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001738 if (PyString_Check(subobj)) {
1739 sub = PyString_AS_STRING(subobj);
1740 n = PyString_GET_SIZE(subobj);
1741 }
1742#ifdef Py_USING_UNICODE
1743 else if (PyUnicode_Check(subobj))
1744 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1745#endif
1746 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1747 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001748
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001749 if (n == 0) {
1750 PyErr_SetString(PyExc_ValueError, "empty separator");
1751 return NULL;
1752 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001753 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001754 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001755
Andrew Dalke525eab32006-05-26 14:00:45 +00001756 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001757 if (list == NULL)
1758 return NULL;
1759
1760 j = len;
1761 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001762
Skip Montanaro26015492007-12-08 15:33:24 +00001763 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001764 while ( (i >= 0) && (maxsplit-- > 0) ) {
1765 for (; i>=0; i--) {
1766 if (Py_STRING_MATCH(s, i, sub, n)) {
1767 SPLIT_ADD(s, i + n, j);
1768 j = i;
1769 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001770 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001771 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001772 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001773 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001774 SPLIT_ADD(s, 0, j);
1775 FIX_PREALLOC_SIZE(list);
1776 if (PyList_Reverse(list) < 0)
1777 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001778 return list;
1779
Andrew Dalke525eab32006-05-26 14:00:45 +00001780onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001781 Py_DECREF(list);
1782 return NULL;
1783}
1784
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787"S.join(sequence) -> string\n\
1788\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001789Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791
1792static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001793string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794{
1795 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001796 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001799 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001800 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001801 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001802 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 seq = PySequence_Fast(orig, "");
1805 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001806 return NULL;
1807 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001808
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001809 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (seqlen == 0) {
1811 Py_DECREF(seq);
1812 return PyString_FromString("");
1813 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001815 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001816 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1817 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001818 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001819 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001822
Raymond Hettinger674f2412004-08-23 23:23:54 +00001823 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001824 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001825 * Do a pre-pass to figure out the total amount of space we'll
1826 * need (sz), see whether any argument is absurd, and defer to
1827 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001828 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001829 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001830 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001831 item = PySequence_Fast_GET_ITEM(seq, i);
1832 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001833#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001834 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001835 /* Defer to Unicode join.
1836 * CAUTION: There's no gurantee that the
1837 * original sequence can be iterated over
1838 * again, so we must pass seq here.
1839 */
1840 PyObject *result;
1841 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001842 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001843 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001844 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001845#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001846 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001847 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001848 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001849 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001850 Py_DECREF(seq);
1851 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001852 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001853 sz += PyString_GET_SIZE(item);
1854 if (i != 0)
1855 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001856 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001857 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001858 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001859 Py_DECREF(seq);
1860 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001862 }
1863
1864 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001865 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001866 if (res == NULL) {
1867 Py_DECREF(seq);
1868 return NULL;
1869 }
1870
1871 /* Catenate everything. */
1872 p = PyString_AS_STRING(res);
1873 for (i = 0; i < seqlen; ++i) {
1874 size_t n;
1875 item = PySequence_Fast_GET_ITEM(seq, i);
1876 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001877 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001878 p += n;
1879 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001880 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001881 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001882 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001884
Jeremy Hylton49048292000-07-11 03:28:17 +00001885 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887}
1888
Tim Peters52e155e2001-06-16 05:42:57 +00001889PyObject *
1890_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001891{
Tim Petersa7259592001-06-16 05:11:17 +00001892 assert(sep != NULL && PyString_Check(sep));
1893 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001894 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001895}
1896
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001897Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001898string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001899{
1900 if (*end > len)
1901 *end = len;
1902 else if (*end < 0)
1903 *end += len;
1904 if (*end < 0)
1905 *end = 0;
1906 if (*start < 0)
1907 *start += len;
1908 if (*start < 0)
1909 *start = 0;
1910}
1911
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001912Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001913string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001915 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001916 const char *sub;
1917 Py_ssize_t sub_len;
1918 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001919 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
Facundo Batista57d56692007-11-16 18:04:14 +00001921 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1922 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001924 /* To support None in "start" and "end" arguments, meaning
1925 the same as if they were not passed.
1926 */
1927 if (obj_start != Py_None)
1928 if (!_PyEval_SliceIndex(obj_start, &start))
1929 return -2;
1930 if (obj_end != Py_None)
1931 if (!_PyEval_SliceIndex(obj_end, &end))
1932 return -2;
1933
Guido van Rossum4c08d552000-03-10 22:55:18 +00001934 if (PyString_Check(subobj)) {
1935 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001936 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001938#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001940 return PyUnicode_Find(
1941 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001942#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001943 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001944 /* XXX - the "expected a character buffer object" is pretty
1945 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946 return -2;
1947
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001948 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001949 return stringlib_find_slice(
1950 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1951 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001952 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001953 return stringlib_rfind_slice(
1954 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1955 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001959PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960"S.find(sub [,start [,end]]) -> int\n\
1961\n\
1962Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001963such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964arguments start and end are interpreted as in slice notation.\n\
1965\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
1968static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001969string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 if (result == -2)
1973 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975}
1976
1977
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001978PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979"S.index(sub [,start [,end]]) -> int\n\
1980\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001981Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982
1983static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001984string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001986 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 if (result == -2)
1988 return NULL;
1989 if (result == -1) {
1990 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001991 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 return NULL;
1993 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001994 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995}
1996
1997
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001998PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999"S.rfind(sub [,start [,end]]) -> int\n\
2000\n\
2001Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00002002such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003arguments start and end are interpreted as in slice notation.\n\
2004\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002005Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
2007static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002008string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002010 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 if (result == -2)
2012 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002013 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014}
2015
2016
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002017PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018"S.rindex(sub [,start [,end]]) -> int\n\
2019\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002020Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021
2022static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002023string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002025 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026 if (result == -2)
2027 return NULL;
2028 if (result == -1) {
2029 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002030 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031 return NULL;
2032 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002033 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034}
2035
2036
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002037Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002038do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2039{
2040 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002041 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002042 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002043 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2044 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002045
2046 i = 0;
2047 if (striptype != RIGHTSTRIP) {
2048 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2049 i++;
2050 }
2051 }
2052
2053 j = len;
2054 if (striptype != LEFTSTRIP) {
2055 do {
2056 j--;
2057 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2058 j++;
2059 }
2060
2061 if (i == 0 && j == len && PyString_CheckExact(self)) {
2062 Py_INCREF(self);
2063 return (PyObject*)self;
2064 }
2065 else
2066 return PyString_FromStringAndSize(s+i, j-i);
2067}
2068
2069
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002070Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002071do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072{
2073 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002074 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076 i = 0;
2077 if (striptype != RIGHTSTRIP) {
2078 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2079 i++;
2080 }
2081 }
2082
2083 j = len;
2084 if (striptype != LEFTSTRIP) {
2085 do {
2086 j--;
2087 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2088 j++;
2089 }
2090
Tim Peters8fa5dd02001-09-12 02:18:30 +00002091 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092 Py_INCREF(self);
2093 return (PyObject*)self;
2094 }
2095 else
2096 return PyString_FromStringAndSize(s+i, j-i);
2097}
2098
2099
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002100Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2102{
2103 PyObject *sep = NULL;
2104
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002105 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002106 return NULL;
2107
2108 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002109 if (PyString_Check(sep))
2110 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002111#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002112 else if (PyUnicode_Check(sep)) {
2113 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2114 PyObject *res;
2115 if (uniself==NULL)
2116 return NULL;
2117 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2118 striptype, sep);
2119 Py_DECREF(uniself);
2120 return res;
2121 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002122#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002123 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002124#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002125 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002126#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002127 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002128#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002129 STRIPNAME(striptype));
2130 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002131 }
2132
2133 return do_strip(self, striptype);
2134}
2135
2136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002137PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002138"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139\n\
2140Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002141whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002142If chars is given and not None, remove characters in chars instead.\n\
2143If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144
2145static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002146string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002148 if (PyTuple_GET_SIZE(args) == 0)
2149 return do_strip(self, BOTHSTRIP); /* Common case */
2150 else
2151 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152}
2153
2154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002155PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002156"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002158Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002159If chars is given and not None, remove characters in chars instead.\n\
2160If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161
2162static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002163string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002165 if (PyTuple_GET_SIZE(args) == 0)
2166 return do_strip(self, LEFTSTRIP); /* Common case */
2167 else
2168 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169}
2170
2171
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002172PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002173"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002175Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002176If chars is given and not None, remove characters in chars instead.\n\
2177If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178
2179static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002180string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002182 if (PyTuple_GET_SIZE(args) == 0)
2183 return do_strip(self, RIGHTSTRIP); /* Common case */
2184 else
2185 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186}
2187
2188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190"S.lower() -> string\n\
2191\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002192Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002194/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2195#ifndef _tolower
2196#define _tolower tolower
2197#endif
2198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002200string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002202 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002203 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002204 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002206 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002207 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002209
2210 s = PyString_AS_STRING(newobj);
2211
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002212 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002213
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002215 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002216 if (isupper(c))
2217 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002219
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221}
2222
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002223PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224"S.upper() -> string\n\
2225\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002226Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002228#ifndef _toupper
2229#define _toupper toupper
2230#endif
2231
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002233string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002235 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002236 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002239 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002240 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002242
2243 s = PyString_AS_STRING(newobj);
2244
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002245 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002246
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002248 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002249 if (islower(c))
2250 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002252
Anthony Baxtera6286212006-04-11 07:42:36 +00002253 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254}
2255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002256PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257"S.title() -> string\n\
2258\n\
2259Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261
2262static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002263string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264{
2265 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002266 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002268 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269
Anthony Baxtera6286212006-04-11 07:42:36 +00002270 newobj = PyString_FromStringAndSize(NULL, n);
2271 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002273 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 for (i = 0; i < n; i++) {
2275 int c = Py_CHARMASK(*s++);
2276 if (islower(c)) {
2277 if (!previous_is_cased)
2278 c = toupper(c);
2279 previous_is_cased = 1;
2280 } else if (isupper(c)) {
2281 if (previous_is_cased)
2282 c = tolower(c);
2283 previous_is_cased = 1;
2284 } else
2285 previous_is_cased = 0;
2286 *s_new++ = c;
2287 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002288 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289}
2290
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002291PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292"S.capitalize() -> string\n\
2293\n\
2294Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296
2297static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002298string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299{
2300 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002301 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002302 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303
Anthony Baxtera6286212006-04-11 07:42:36 +00002304 newobj = PyString_FromStringAndSize(NULL, n);
2305 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002307 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 if (0 < n) {
2309 int c = Py_CHARMASK(*s++);
2310 if (islower(c))
2311 *s_new = toupper(c);
2312 else
2313 *s_new = c;
2314 s_new++;
2315 }
2316 for (i = 1; i < n; i++) {
2317 int c = Py_CHARMASK(*s++);
2318 if (isupper(c))
2319 *s_new = tolower(c);
2320 else
2321 *s_new = c;
2322 s_new++;
2323 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002324 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325}
2326
2327
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329"S.count(sub[, start[, end]]) -> int\n\
2330\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002331Return the number of non-overlapping occurrences of substring sub in\n\
2332string S[start:end]. Optional arguments start and end are interpreted\n\
2333as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334
2335static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002336string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002338 PyObject *sub_obj;
2339 const char *str = PyString_AS_STRING(self), *sub;
2340 Py_ssize_t sub_len;
2341 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002343 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2344 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002346
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002347 if (PyString_Check(sub_obj)) {
2348 sub = PyString_AS_STRING(sub_obj);
2349 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002351#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002352 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002353 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002354 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002355 if (count == -1)
2356 return NULL;
2357 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002358 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002359 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002360#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002361 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002362 return NULL;
2363
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002364 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002365
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002366 return PyInt_FromSsize_t(
2367 stringlib_count(str + start, end - start, sub, sub_len)
2368 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369}
2370
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002371PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372"S.swapcase() -> string\n\
2373\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002375converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376
2377static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002378string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379{
2380 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002381 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002382 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383
Anthony Baxtera6286212006-04-11 07:42:36 +00002384 newobj = PyString_FromStringAndSize(NULL, n);
2385 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002387 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388 for (i = 0; i < n; i++) {
2389 int c = Py_CHARMASK(*s++);
2390 if (islower(c)) {
2391 *s_new = toupper(c);
2392 }
2393 else if (isupper(c)) {
2394 *s_new = tolower(c);
2395 }
2396 else
2397 *s_new = c;
2398 s_new++;
2399 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002400 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401}
2402
2403
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002404PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405"S.translate(table [,deletechars]) -> string\n\
2406\n\
2407Return a copy of the string S, where all characters occurring\n\
2408in the optional argument deletechars are removed, and the\n\
2409remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002410translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411
2412static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002413string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002416 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002417 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002419 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002420 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 PyObject *result;
2422 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002425 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002426 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428
2429 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002430 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431 tablen = PyString_GET_SIZE(tableobj);
2432 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002433 else if (tableobj == Py_None) {
2434 table = NULL;
2435 tablen = 256;
2436 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002437#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002439 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002440 parameter; instead a mapping to None will cause characters
2441 to be deleted. */
2442 if (delobj != NULL) {
2443 PyErr_SetString(PyExc_TypeError,
2444 "deletions are implemented differently for unicode");
2445 return NULL;
2446 }
2447 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2448 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002449#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002450 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452
Martin v. Löwis00b61272002-12-12 20:03:19 +00002453 if (tablen != 256) {
2454 PyErr_SetString(PyExc_ValueError,
2455 "translation table must be 256 characters long");
2456 return NULL;
2457 }
2458
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459 if (delobj != NULL) {
2460 if (PyString_Check(delobj)) {
2461 del_table = PyString_AS_STRING(delobj);
2462 dellen = PyString_GET_SIZE(delobj);
2463 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002464#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 else if (PyUnicode_Check(delobj)) {
2466 PyErr_SetString(PyExc_TypeError,
2467 "deletions are implemented differently for unicode");
2468 return NULL;
2469 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002470#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002471 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2472 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002473 }
2474 else {
2475 del_table = NULL;
2476 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002477 }
2478
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002479 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480 result = PyString_FromStringAndSize((char *)NULL, inlen);
2481 if (result == NULL)
2482 return NULL;
2483 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002484 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002486 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487 /* If no deletions are required, use faster code */
2488 for (i = inlen; --i >= 0; ) {
2489 c = Py_CHARMASK(*input++);
2490 if (Py_CHARMASK((*output++ = table[c])) != c)
2491 changed = 1;
2492 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002493 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002494 return result;
2495 Py_DECREF(result);
2496 Py_INCREF(input_obj);
2497 return input_obj;
2498 }
2499
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002500 if (table == NULL) {
2501 for (i = 0; i < 256; i++)
2502 trans_table[i] = Py_CHARMASK(i);
2503 } else {
2504 for (i = 0; i < 256; i++)
2505 trans_table[i] = Py_CHARMASK(table[i]);
2506 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507
2508 for (i = 0; i < dellen; i++)
2509 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2510
2511 for (i = inlen; --i >= 0; ) {
2512 c = Py_CHARMASK(*input++);
2513 if (trans_table[c] != -1)
2514 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2515 continue;
2516 changed = 1;
2517 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002518 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002519 Py_DECREF(result);
2520 Py_INCREF(input_obj);
2521 return input_obj;
2522 }
2523 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002524 if (inlen > 0)
2525 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526 return result;
2527}
2528
2529
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002530#define FORWARD 1
2531#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002533/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002535#define findchar(target, target_len, c) \
2536 ((char *)memchr((const void *)(target), c, target_len))
2537
2538/* String ops must return a string. */
2539/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002540Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002541return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002542{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543 if (PyString_CheckExact(self)) {
2544 Py_INCREF(self);
2545 return self;
2546 }
2547 return (PyStringObject *)PyString_FromStringAndSize(
2548 PyString_AS_STRING(self),
2549 PyString_GET_SIZE(self));
2550}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002552Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002553countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002554{
2555 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002556 const char *start=target;
2557 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002558
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 while ( (start=findchar(start, end-start, c)) != NULL ) {
2560 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002561 if (count >= maxcount)
2562 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 start += 1;
2564 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 return count;
2566}
2567
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002568Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002569findstring(const char *target, Py_ssize_t target_len,
2570 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002571 Py_ssize_t start,
2572 Py_ssize_t end,
2573 int direction)
2574{
2575 if (start < 0) {
2576 start += target_len;
2577 if (start < 0)
2578 start = 0;
2579 }
2580 if (end > target_len) {
2581 end = target_len;
2582 } else if (end < 0) {
2583 end += target_len;
2584 if (end < 0)
2585 end = 0;
2586 }
2587
2588 /* zero-length substrings always match at the first attempt */
2589 if (pattern_len == 0)
2590 return (direction > 0) ? start : end;
2591
2592 end -= pattern_len;
2593
2594 if (direction < 0) {
2595 for (; end >= start; end--)
2596 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2597 return end;
2598 } else {
2599 for (; start <= end; start++)
2600 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2601 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002602 }
2603 return -1;
2604}
2605
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002606Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002607countstring(const char *target, Py_ssize_t target_len,
2608 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002609 Py_ssize_t start,
2610 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002611 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002612{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615 if (start < 0) {
2616 start += target_len;
2617 if (start < 0)
2618 start = 0;
2619 }
2620 if (end > target_len) {
2621 end = target_len;
2622 } else if (end < 0) {
2623 end += target_len;
2624 if (end < 0)
2625 end = 0;
2626 }
2627
2628 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002629 if (pattern_len == 0 || maxcount == 0) {
2630 if (target_len+1 < maxcount)
2631 return target_len+1;
2632 return maxcount;
2633 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002634
2635 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002636 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002637 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2639 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002640 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002641 end -= pattern_len-1;
2642 }
2643 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002644 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002645 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2646 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002647 if (--maxcount <= 0)
2648 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649 start += pattern_len-1;
2650 }
2651 }
2652 return count;
2653}
2654
2655
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002656/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002657
2658/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002659Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002660replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002661 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662 Py_ssize_t maxcount)
2663{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002664 char *self_s, *result_s;
2665 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666 Py_ssize_t count, i, product;
2667 PyStringObject *result;
2668
2669 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002670
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671 /* 1 at the end plus 1 after every character */
2672 count = self_len+1;
2673 if (maxcount < count)
2674 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002675
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002676 /* Check for overflow */
2677 /* result_len = count * to_len + self_len; */
2678 product = count * to_len;
2679 if (product / to_len != count) {
2680 PyErr_SetString(PyExc_OverflowError,
2681 "replace string is too long");
2682 return NULL;
2683 }
2684 result_len = product + self_len;
2685 if (result_len < 0) {
2686 PyErr_SetString(PyExc_OverflowError,
2687 "replace string is too long");
2688 return NULL;
2689 }
2690
2691 if (! (result = (PyStringObject *)
2692 PyString_FromStringAndSize(NULL, result_len)) )
2693 return NULL;
2694
2695 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696 result_s = PyString_AS_STRING(result);
2697
2698 /* TODO: special case single character, which doesn't need memcpy */
2699
2700 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002701 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702 result_s += to_len;
2703 count -= 1;
2704
2705 for (i=0; i<count; i++) {
2706 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002707 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002708 result_s += to_len;
2709 }
2710
2711 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002712 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002713
2714 return result;
2715}
2716
2717/* Special case for deleting a single character */
2718/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002719Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002720replace_delete_single_character(PyStringObject *self,
2721 char from_c, Py_ssize_t maxcount)
2722{
2723 char *self_s, *result_s;
2724 char *start, *next, *end;
2725 Py_ssize_t self_len, result_len;
2726 Py_ssize_t count;
2727 PyStringObject *result;
2728
2729 self_len = PyString_GET_SIZE(self);
2730 self_s = PyString_AS_STRING(self);
2731
Andrew Dalke51324072006-05-26 20:25:22 +00002732 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 if (count == 0) {
2734 return return_self(self);
2735 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736
2737 result_len = self_len - count; /* from_len == 1 */
2738 assert(result_len>=0);
2739
2740 if ( (result = (PyStringObject *)
2741 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2742 return NULL;
2743 result_s = PyString_AS_STRING(result);
2744
2745 start = self_s;
2746 end = self_s + self_len;
2747 while (count-- > 0) {
2748 next = findchar(start, end-start, from_c);
2749 if (next == NULL)
2750 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002751 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 result_s += (next-start);
2753 start = next+1;
2754 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002755 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002756
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757 return result;
2758}
2759
2760/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2761
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002762Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002763replace_delete_substring(PyStringObject *self,
2764 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002766 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002768 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769 Py_ssize_t count, offset;
2770 PyStringObject *result;
2771
2772 self_len = PyString_GET_SIZE(self);
2773 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002774
2775 count = countstring(self_s, self_len,
2776 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002777 0, self_len, 1,
2778 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779
2780 if (count == 0) {
2781 /* no matches */
2782 return return_self(self);
2783 }
2784
2785 result_len = self_len - (count * from_len);
2786 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002787
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 if ( (result = (PyStringObject *)
2789 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2790 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002791
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794 start = self_s;
2795 end = self_s + self_len;
2796 while (count-- > 0) {
2797 offset = findstring(start, end-start,
2798 from_s, from_len,
2799 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002800 if (offset == -1)
2801 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002803
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002804 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002805
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 result_s += (next-start);
2807 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002808 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002809 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002811}
2812
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002814Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815replace_single_character_in_place(PyStringObject *self,
2816 char from_c, char to_c,
2817 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002818{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 char *self_s, *result_s, *start, *end, *next;
2820 Py_ssize_t self_len;
2821 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002822
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823 /* The result string will be the same size */
2824 self_s = PyString_AS_STRING(self);
2825 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002826
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002828
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 if (next == NULL) {
2830 /* No matches; return the original string */
2831 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002832 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002833
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002835 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 if (result == NULL)
2837 return NULL;
2838 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002839 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002840
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841 /* change everything in-place, starting with this one */
2842 start = result_s + (next-self_s);
2843 *start = to_c;
2844 start++;
2845 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 while (--maxcount > 0) {
2848 next = findchar(start, end-start, from_c);
2849 if (next == NULL)
2850 break;
2851 *next = to_c;
2852 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002853 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002854
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002856}
2857
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002858/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002859Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002860replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002861 const char *from_s, Py_ssize_t from_len,
2862 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863 Py_ssize_t maxcount)
2864{
2865 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002866 char *self_s;
2867 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002868 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002869
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002871
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 self_s = PyString_AS_STRING(self);
2873 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875 offset = findstring(self_s, self_len,
2876 from_s, from_len,
2877 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 if (offset == -1) {
2879 /* No matches; return the original string */
2880 return return_self(self);
2881 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002882
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002884 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002885 if (result == NULL)
2886 return NULL;
2887 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002888 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002889
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002890 /* change everything in-place, starting with this one */
2891 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002892 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002893 start += from_len;
2894 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002895
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002896 while ( --maxcount > 0) {
2897 offset = findstring(start, end-start,
2898 from_s, from_len,
2899 0, end-start, FORWARD);
2900 if (offset==-1)
2901 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002902 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903 start += offset+from_len;
2904 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002905
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 return result;
2907}
2908
2909/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002910Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911replace_single_character(PyStringObject *self,
2912 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002913 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 Py_ssize_t maxcount)
2915{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002916 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002918 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 Py_ssize_t count, product;
2920 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002921
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922 self_s = PyString_AS_STRING(self);
2923 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002924
Andrew Dalke51324072006-05-26 20:25:22 +00002925 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002926 if (count == 0) {
2927 /* no matches, return unchanged */
2928 return return_self(self);
2929 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002930
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002931 /* use the difference between current and new, hence the "-1" */
2932 /* result_len = self_len + count * (to_len-1) */
2933 product = count * (to_len-1);
2934 if (product / (to_len-1) != count) {
2935 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2936 return NULL;
2937 }
2938 result_len = self_len + product;
2939 if (result_len < 0) {
2940 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2941 return NULL;
2942 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002943
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002944 if ( (result = (PyStringObject *)
2945 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2946 return NULL;
2947 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002948
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002949 start = self_s;
2950 end = self_s + self_len;
2951 while (count-- > 0) {
2952 next = findchar(start, end-start, from_c);
2953 if (next == NULL)
2954 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002955
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002956 if (next == start) {
2957 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += to_len;
2960 start += 1;
2961 } else {
2962 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002963 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002964 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002965 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002966 result_s += to_len;
2967 start = next+1;
2968 }
2969 }
2970 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002971 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002972
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 return result;
2974}
2975
2976/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002977Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002979 const char *from_s, Py_ssize_t from_len,
2980 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002982 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002984 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002985 Py_ssize_t count, offset, product;
2986 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002987
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002988 self_s = PyString_AS_STRING(self);
2989 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002990
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002991 count = countstring(self_s, self_len,
2992 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002993 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002994 if (count == 0) {
2995 /* no matches, return unchanged */
2996 return return_self(self);
2997 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002998
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002999 /* Check for overflow */
3000 /* result_len = self_len + count * (to_len-from_len) */
3001 product = count * (to_len-from_len);
3002 if (product / (to_len-from_len) != count) {
3003 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3004 return NULL;
3005 }
3006 result_len = self_len + product;
3007 if (result_len < 0) {
3008 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3009 return NULL;
3010 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003011
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 if ( (result = (PyStringObject *)
3013 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3014 return NULL;
3015 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003016
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 start = self_s;
3018 end = self_s + self_len;
3019 while (count-- > 0) {
3020 offset = findstring(start, end-start,
3021 from_s, from_len,
3022 0, end-start, FORWARD);
3023 if (offset == -1)
3024 break;
3025 next = start+offset;
3026 if (next == start) {
3027 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003028 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 result_s += to_len;
3030 start += from_len;
3031 } else {
3032 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003033 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003034 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003035 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003036 result_s += to_len;
3037 start = next+from_len;
3038 }
3039 }
3040 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003041 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003042
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003043 return result;
3044}
3045
3046
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003047Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003049 const char *from_s, Py_ssize_t from_len,
3050 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003051 Py_ssize_t maxcount)
3052{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 if (maxcount < 0) {
3054 maxcount = PY_SSIZE_T_MAX;
3055 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3056 /* nothing to do; return the original string */
3057 return return_self(self);
3058 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003059
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003060 if (maxcount == 0 ||
3061 (from_len == 0 && to_len == 0)) {
3062 /* nothing to do; return the original string */
3063 return return_self(self);
3064 }
3065
3066 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003067
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003068 if (from_len == 0) {
3069 /* insert the 'to' string everywhere. */
3070 /* >>> "Python".replace("", ".") */
3071 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003072 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003073 }
3074
3075 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3076 /* point for an empty self string to generate a non-empty string */
3077 /* Special case so the remaining code always gets a non-empty string */
3078 if (PyString_GET_SIZE(self) == 0) {
3079 return return_self(self);
3080 }
3081
3082 if (to_len == 0) {
3083 /* delete all occurances of 'from' string */
3084 if (from_len == 1) {
3085 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003086 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003087 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003088 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089 }
3090 }
3091
3092 /* Handle special case where both strings have the same length */
3093
3094 if (from_len == to_len) {
3095 if (from_len == 1) {
3096 return replace_single_character_in_place(
3097 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003098 from_s[0],
3099 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003100 maxcount);
3101 } else {
3102 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003103 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003104 }
3105 }
3106
3107 /* Otherwise use the more generic algorithms */
3108 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003109 return replace_single_character(self, from_s[0],
3110 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003111 } else {
3112 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003113 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003114 }
3115}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003116
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003117PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003118"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003119\n\
3120Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003121old replaced by new. If the optional argument count is\n\
3122given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003123
3124static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003125string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003126{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003127 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003128 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003129 const char *from_s, *to_s;
3130 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003131
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003132 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003133 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003135 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003136 from_s = PyString_AS_STRING(from);
3137 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003139#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003140 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003141 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003142 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003143#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003144 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 return NULL;
3146
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003147 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003148 to_s = PyString_AS_STRING(to);
3149 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003151#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003152 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003153 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003154 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003155#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003156 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 return NULL;
3158
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003159 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003160 from_s, from_len,
3161 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003162}
3163
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003164/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003165
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003166/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003167 * against substr, using the start and end arguments. Returns
3168 * -1 on error, 0 if not found and 1 if found.
3169 */
3170Py_LOCAL(int)
3171_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3172 Py_ssize_t end, int direction)
3173{
3174 Py_ssize_t len = PyString_GET_SIZE(self);
3175 Py_ssize_t slen;
3176 const char* sub;
3177 const char* str;
3178
3179 if (PyString_Check(substr)) {
3180 sub = PyString_AS_STRING(substr);
3181 slen = PyString_GET_SIZE(substr);
3182 }
3183#ifdef Py_USING_UNICODE
3184 else if (PyUnicode_Check(substr))
3185 return PyUnicode_Tailmatch((PyObject *)self,
3186 substr, start, end, direction);
3187#endif
3188 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3189 return -1;
3190 str = PyString_AS_STRING(self);
3191
3192 string_adjust_indices(&start, &end, len);
3193
3194 if (direction < 0) {
3195 /* startswith */
3196 if (start+slen > len)
3197 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003198 } else {
3199 /* endswith */
3200 if (end-start < slen || start > len)
3201 return 0;
3202
3203 if (end-slen > start)
3204 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003205 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003206 if (end-start >= slen)
3207 return ! memcmp(str+start, sub, slen);
3208 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003209}
3210
3211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003212PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003213"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003214\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003215Return True if S starts with the specified prefix, False otherwise.\n\
3216With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003217With optional end, stop comparing S at that position.\n\
3218prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219
3220static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003221string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003222{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003223 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003224 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003226 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227
Guido van Rossumc6821402000-05-08 14:08:05 +00003228 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3229 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003231 if (PyTuple_Check(subobj)) {
3232 Py_ssize_t i;
3233 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3234 result = _string_tailmatch(self,
3235 PyTuple_GET_ITEM(subobj, i),
3236 start, end, -1);
3237 if (result == -1)
3238 return NULL;
3239 else if (result) {
3240 Py_RETURN_TRUE;
3241 }
3242 }
3243 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003244 }
Georg Brandl24250812006-06-09 18:45:48 +00003245 result = _string_tailmatch(self, subobj, start, end, -1);
3246 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003247 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003248 else
Georg Brandl24250812006-06-09 18:45:48 +00003249 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003250}
3251
3252
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003253PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003254"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003255\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003256Return True if S ends with the specified suffix, False otherwise.\n\
3257With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003258With optional end, stop comparing S at that position.\n\
3259suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003260
3261static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003262string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003263{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003264 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003265 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003267 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003268
Guido van Rossumc6821402000-05-08 14:08:05 +00003269 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3270 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003271 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003272 if (PyTuple_Check(subobj)) {
3273 Py_ssize_t i;
3274 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3275 result = _string_tailmatch(self,
3276 PyTuple_GET_ITEM(subobj, i),
3277 start, end, +1);
3278 if (result == -1)
3279 return NULL;
3280 else if (result) {
3281 Py_RETURN_TRUE;
3282 }
3283 }
3284 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003285 }
Georg Brandl24250812006-06-09 18:45:48 +00003286 result = _string_tailmatch(self, subobj, start, end, +1);
3287 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003288 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003289 else
Georg Brandl24250812006-06-09 18:45:48 +00003290 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003291}
3292
3293
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003294PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003295"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003296\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003297Encodes S using the codec registered for encoding. encoding defaults\n\
3298to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003299handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003300a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3301'xmlcharrefreplace' as well as any other name registered with\n\
3302codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003303
3304static PyObject *
3305string_encode(PyStringObject *self, PyObject *args)
3306{
3307 char *encoding = NULL;
3308 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003309 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003310
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003311 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3312 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003313 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003314 if (v == NULL)
3315 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003316 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3317 PyErr_Format(PyExc_TypeError,
3318 "encoder did not return a string/unicode object "
3319 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003320 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003321 Py_DECREF(v);
3322 return NULL;
3323 }
3324 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003325
3326 onError:
3327 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003328}
3329
3330
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003331PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003332"S.decode([encoding[,errors]]) -> object\n\
3333\n\
3334Decodes S using the codec registered for encoding. encoding defaults\n\
3335to the default encoding. errors may be given to set a different error\n\
3336handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003337a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3338as well as any other name registerd with codecs.register_error that is\n\
3339able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003340
3341static PyObject *
3342string_decode(PyStringObject *self, PyObject *args)
3343{
3344 char *encoding = NULL;
3345 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003346 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003347
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003348 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3349 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003350 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003351 if (v == NULL)
3352 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003353 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3354 PyErr_Format(PyExc_TypeError,
3355 "decoder did not return a string/unicode object "
3356 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003357 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003358 Py_DECREF(v);
3359 return NULL;
3360 }
3361 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003362
3363 onError:
3364 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003365}
3366
3367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003368PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003369"S.expandtabs([tabsize]) -> string\n\
3370\n\
3371Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003372If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373
3374static PyObject*
3375string_expandtabs(PyStringObject *self, PyObject *args)
3376{
Guido van Rossum5bdff602008-03-11 21:18:06 +00003377 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378 char *q;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003379 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003380 PyObject *u;
3381 int tabsize = 8;
3382
3383 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3384 return NULL;
3385
Thomas Wouters7e474022000-07-16 12:04:32 +00003386 /* First pass: determine size of output string */
Guido van Rossum5bdff602008-03-11 21:18:06 +00003387 i = 0; /* chars up to and including most recent \n or \r */
3388 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3389 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390 for (p = PyString_AS_STRING(self); p < e; p++)
3391 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003392 if (tabsize > 0) {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003393 incr = tabsize - (j % tabsize);
3394 if (j > PY_SSIZE_T_MAX - incr)
3395 goto overflow1;
3396 j += incr;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003397 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398 }
3399 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003400 if (j > PY_SSIZE_T_MAX - 1)
3401 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402 j++;
3403 if (*p == '\n' || *p == '\r') {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003404 if (i > PY_SSIZE_T_MAX - j)
3405 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 i += j;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003407 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003408 }
3409 }
3410
Guido van Rossum5bdff602008-03-11 21:18:06 +00003411 if (i > PY_SSIZE_T_MAX - j)
3412 goto overflow1;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003413
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414 /* Second pass: create output string and fill it */
3415 u = PyString_FromStringAndSize(NULL, i + j);
3416 if (!u)
3417 return NULL;
3418
Guido van Rossum5bdff602008-03-11 21:18:06 +00003419 j = 0; /* same as in first pass */
3420 q = PyString_AS_STRING(u); /* next output char */
3421 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422
3423 for (p = PyString_AS_STRING(self); p < e; p++)
3424 if (*p == '\t') {
3425 if (tabsize > 0) {
3426 i = tabsize - (j % tabsize);
3427 j += i;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003428 while (i--) {
3429 if (q >= qe)
3430 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 *q++ = ' ';
Guido van Rossum5bdff602008-03-11 21:18:06 +00003432 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433 }
3434 }
3435 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003436 if (q >= qe)
3437 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438 *q++ = *p;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003439 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440 if (*p == '\n' || *p == '\r')
3441 j = 0;
3442 }
3443
3444 return u;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003445
3446 overflow2:
3447 Py_DECREF(u);
3448 overflow1:
3449 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3450 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451}
3452
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003453Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003454pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455{
3456 PyObject *u;
3457
3458 if (left < 0)
3459 left = 0;
3460 if (right < 0)
3461 right = 0;
3462
Tim Peters8fa5dd02001-09-12 02:18:30 +00003463 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464 Py_INCREF(self);
3465 return (PyObject *)self;
3466 }
3467
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003468 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469 left + PyString_GET_SIZE(self) + right);
3470 if (u) {
3471 if (left)
3472 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003473 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003474 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475 PyString_GET_SIZE(self));
3476 if (right)
3477 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3478 fill, right);
3479 }
3480
3481 return u;
3482}
3483
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003484PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003485"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003486"\n"
3487"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003488"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003489
3490static PyObject *
3491string_ljust(PyStringObject *self, PyObject *args)
3492{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003493 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003494 char fillchar = ' ';
3495
Thomas Wouters4abb3662006-04-19 14:50:15 +00003496 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003497 return NULL;
3498
Tim Peters8fa5dd02001-09-12 02:18:30 +00003499 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500 Py_INCREF(self);
3501 return (PyObject*) self;
3502 }
3503
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003504 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505}
3506
3507
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003508PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003509"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003510"\n"
3511"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003512"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513
3514static PyObject *
3515string_rjust(PyStringObject *self, PyObject *args)
3516{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003517 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003518 char fillchar = ' ';
3519
Thomas Wouters4abb3662006-04-19 14:50:15 +00003520 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003521 return NULL;
3522
Tim Peters8fa5dd02001-09-12 02:18:30 +00003523 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003524 Py_INCREF(self);
3525 return (PyObject*) self;
3526 }
3527
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003528 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529}
3530
3531
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003532PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003533"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003534"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003535"Return S centered in a string of length width. Padding is\n"
3536"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537
3538static PyObject *
3539string_center(PyStringObject *self, PyObject *args)
3540{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003541 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003542 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003543 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544
Thomas Wouters4abb3662006-04-19 14:50:15 +00003545 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546 return NULL;
3547
Tim Peters8fa5dd02001-09-12 02:18:30 +00003548 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549 Py_INCREF(self);
3550 return (PyObject*) self;
3551 }
3552
3553 marg = width - PyString_GET_SIZE(self);
3554 left = marg / 2 + (marg & width & 1);
3555
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003556 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557}
3558
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003559PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003560"S.zfill(width) -> string\n"
3561"\n"
3562"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003563"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003564
3565static PyObject *
3566string_zfill(PyStringObject *self, PyObject *args)
3567{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003568 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003569 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003570 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003571 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003572
Thomas Wouters4abb3662006-04-19 14:50:15 +00003573 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003574 return NULL;
3575
3576 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003577 if (PyString_CheckExact(self)) {
3578 Py_INCREF(self);
3579 return (PyObject*) self;
3580 }
3581 else
3582 return PyString_FromStringAndSize(
3583 PyString_AS_STRING(self),
3584 PyString_GET_SIZE(self)
3585 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003586 }
3587
3588 fill = width - PyString_GET_SIZE(self);
3589
3590 s = pad(self, fill, 0, '0');
3591
3592 if (s == NULL)
3593 return NULL;
3594
3595 p = PyString_AS_STRING(s);
3596 if (p[fill] == '+' || p[fill] == '-') {
3597 /* move sign to beginning of string */
3598 p[0] = p[fill];
3599 p[fill] = '0';
3600 }
3601
3602 return (PyObject*) s;
3603}
3604
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003605PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003606"S.isspace() -> bool\n\
3607\n\
3608Return True if all characters in S are whitespace\n\
3609and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003610
3611static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003612string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613{
Fred Drakeba096332000-07-09 07:04:36 +00003614 register const unsigned char *p
3615 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003616 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618 /* Shortcut for single character strings */
3619 if (PyString_GET_SIZE(self) == 1 &&
3620 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003623 /* Special case for empty strings */
3624 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003626
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627 e = p + PyString_GET_SIZE(self);
3628 for (; p < e; p++) {
3629 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633}
3634
3635
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003636PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003638\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003639Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003640and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003641
3642static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003643string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003644{
Fred Drakeba096332000-07-09 07:04:36 +00003645 register const unsigned char *p
3646 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003647 register const unsigned char *e;
3648
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649 /* Shortcut for single character strings */
3650 if (PyString_GET_SIZE(self) == 1 &&
3651 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003652 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003653
3654 /* Special case for empty strings */
3655 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003657
3658 e = p + PyString_GET_SIZE(self);
3659 for (; p < e; p++) {
3660 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003662 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003664}
3665
3666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003667PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003669\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003670Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003671and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003672
3673static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003674string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003675{
Fred Drakeba096332000-07-09 07:04:36 +00003676 register const unsigned char *p
3677 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003678 register const unsigned char *e;
3679
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680 /* Shortcut for single character strings */
3681 if (PyString_GET_SIZE(self) == 1 &&
3682 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003683 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684
3685 /* Special case for empty strings */
3686 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003688
3689 e = p + PyString_GET_SIZE(self);
3690 for (; p < e; p++) {
3691 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003693 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003695}
3696
3697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003698PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003701Return True if all characters in S are digits\n\
3702and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
3704static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003705string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706{
Fred Drakeba096332000-07-09 07:04:36 +00003707 register const unsigned char *p
3708 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003709 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711 /* Shortcut for single character strings */
3712 if (PyString_GET_SIZE(self) == 1 &&
3713 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003716 /* Special case for empty strings */
3717 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003719
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 e = p + PyString_GET_SIZE(self);
3721 for (; p < e; p++) {
3722 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726}
3727
3728
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003729PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003733at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734
3735static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003736string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737{
Fred Drakeba096332000-07-09 07:04:36 +00003738 register const unsigned char *p
3739 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003740 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741 int cased;
3742
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743 /* Shortcut for single character strings */
3744 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003745 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003747 /* Special case for empty strings */
3748 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003750
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751 e = p + PyString_GET_SIZE(self);
3752 cased = 0;
3753 for (; p < e; p++) {
3754 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003755 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 else if (!cased && islower(*p))
3757 cased = 1;
3758 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760}
3761
3762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003763PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003766Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003767at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768
3769static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003770string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771{
Fred Drakeba096332000-07-09 07:04:36 +00003772 register const unsigned char *p
3773 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003774 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775 int cased;
3776
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 /* Shortcut for single character strings */
3778 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003779 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003781 /* Special case for empty strings */
3782 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003783 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003784
Guido van Rossum4c08d552000-03-10 22:55:18 +00003785 e = p + PyString_GET_SIZE(self);
3786 cased = 0;
3787 for (; p < e; p++) {
3788 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003789 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 else if (!cased && isupper(*p))
3791 cased = 1;
3792 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003793 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794}
3795
3796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003797PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003798"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003800Return True if S is a titlecased string and there is at least one\n\
3801character in S, i.e. uppercase characters may only follow uncased\n\
3802characters and lowercase characters only cased ones. Return False\n\
3803otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804
3805static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003806string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003807{
Fred Drakeba096332000-07-09 07:04:36 +00003808 register const unsigned char *p
3809 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003810 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811 int cased, previous_is_cased;
3812
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813 /* Shortcut for single character strings */
3814 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003815 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003817 /* Special case for empty strings */
3818 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003819 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003820
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 e = p + PyString_GET_SIZE(self);
3822 cased = 0;
3823 previous_is_cased = 0;
3824 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003825 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826
3827 if (isupper(ch)) {
3828 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003829 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 previous_is_cased = 1;
3831 cased = 1;
3832 }
3833 else if (islower(ch)) {
3834 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003835 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836 previous_is_cased = 1;
3837 cased = 1;
3838 }
3839 else
3840 previous_is_cased = 0;
3841 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003842 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003843}
3844
3845
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003846PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003847"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848\n\
3849Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003850Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003851is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853static PyObject*
3854string_splitlines(PyStringObject *self, PyObject *args)
3855{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003856 register Py_ssize_t i;
3857 register Py_ssize_t j;
3858 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003859 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003860 PyObject *list;
3861 PyObject *str;
3862 char *data;
3863
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003864 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003865 return NULL;
3866
3867 data = PyString_AS_STRING(self);
3868 len = PyString_GET_SIZE(self);
3869
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003870 /* This does not use the preallocated list because splitlines is
3871 usually run with hundreds of newlines. The overhead of
3872 switching between PyList_SET_ITEM and append causes about a
3873 2-3% slowdown for that common case. A smarter implementation
3874 could move the if check out, so the SET_ITEMs are done first
3875 and the appends only done when the prealloc buffer is full.
3876 That's too much work for little gain.*/
3877
Guido van Rossum4c08d552000-03-10 22:55:18 +00003878 list = PyList_New(0);
3879 if (!list)
3880 goto onError;
3881
3882 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003883 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003884
Guido van Rossum4c08d552000-03-10 22:55:18 +00003885 /* Find a line and append it */
3886 while (i < len && data[i] != '\n' && data[i] != '\r')
3887 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003888
3889 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003890 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003891 if (i < len) {
3892 if (data[i] == '\r' && i + 1 < len &&
3893 data[i+1] == '\n')
3894 i += 2;
3895 else
3896 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003897 if (keepends)
3898 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003899 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003900 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003901 j = i;
3902 }
3903 if (j < len) {
3904 SPLIT_APPEND(data, j, len);
3905 }
3906
3907 return list;
3908
3909 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003910 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003911 return NULL;
3912}
3913
3914#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003915#undef SPLIT_ADD
3916#undef MAX_PREALLOC
3917#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003918
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003919static PyObject *
3920string_getnewargs(PyStringObject *v)
3921{
Christian Heimese93237d2007-12-19 02:37:44 +00003922 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003923}
3924
Eric Smitha9f7d622008-02-17 19:46:49 +00003925
3926#include "stringlib/string_format.h"
3927
3928PyDoc_STRVAR(format__doc__,
3929"S.format(*args, **kwargs) -> unicode\n\
3930\n\
3931");
3932
3933PyDoc_STRVAR(p_format__doc__,
3934"S.__format__(format_spec) -> unicode\n\
3935\n\
3936");
3937
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003938
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003939static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003940string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003941 /* Counterparts of the obsolete stropmodule functions; except
3942 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003943 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3944 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003945 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003946 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3947 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003948 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3949 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3950 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3951 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3952 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3953 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3954 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003955 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3956 capitalize__doc__},
3957 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3958 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3959 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003960 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003961 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3962 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3963 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3964 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3965 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3966 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3967 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003968 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3969 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003970 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3971 startswith__doc__},
3972 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3973 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3974 swapcase__doc__},
3975 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3976 translate__doc__},
3977 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3978 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3979 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3980 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3981 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Eric Smitha9f7d622008-02-17 19:46:49 +00003982 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3983 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3984 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3985 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003986 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3987 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3988 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3989 expandtabs__doc__},
3990 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3991 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003992 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003993 {NULL, NULL} /* sentinel */
3994};
3995
Jeremy Hylton938ace62002-07-17 16:30:39 +00003996static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003997str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3998
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003999static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004000string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004001{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004002 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004003 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004004
Guido van Rossumae960af2001-08-30 03:11:59 +00004005 if (type != &PyString_Type)
4006 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4008 return NULL;
4009 if (x == NULL)
4010 return PyString_FromString("");
4011 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004012}
4013
Guido van Rossumae960af2001-08-30 03:11:59 +00004014static PyObject *
4015str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4016{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004017 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004018 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004019
4020 assert(PyType_IsSubtype(type, &PyString_Type));
4021 tmp = string_new(&PyString_Type, args, kwds);
4022 if (tmp == NULL)
4023 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004024 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004025 n = PyString_GET_SIZE(tmp);
4026 pnew = type->tp_alloc(type, n);
4027 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004028 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004029 ((PyStringObject *)pnew)->ob_shash =
4030 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004031 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004032 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004033 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004034 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004035}
4036
Guido van Rossumcacfc072002-05-24 19:01:59 +00004037static PyObject *
4038basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4039{
4040 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004041 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004042 return NULL;
4043}
4044
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004045static PyObject *
4046string_mod(PyObject *v, PyObject *w)
4047{
4048 if (!PyString_Check(v)) {
4049 Py_INCREF(Py_NotImplemented);
4050 return Py_NotImplemented;
4051 }
4052 return PyString_Format(v, w);
4053}
4054
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004055PyDoc_STRVAR(basestring_doc,
4056"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004057
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004058static PyNumberMethods string_as_number = {
4059 0, /*nb_add*/
4060 0, /*nb_subtract*/
4061 0, /*nb_multiply*/
4062 0, /*nb_divide*/
4063 string_mod, /*nb_remainder*/
4064};
4065
4066
Guido van Rossumcacfc072002-05-24 19:01:59 +00004067PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004068 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004069 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004070 0,
4071 0,
4072 0, /* tp_dealloc */
4073 0, /* tp_print */
4074 0, /* tp_getattr */
4075 0, /* tp_setattr */
4076 0, /* tp_compare */
4077 0, /* tp_repr */
4078 0, /* tp_as_number */
4079 0, /* tp_as_sequence */
4080 0, /* tp_as_mapping */
4081 0, /* tp_hash */
4082 0, /* tp_call */
4083 0, /* tp_str */
4084 0, /* tp_getattro */
4085 0, /* tp_setattro */
4086 0, /* tp_as_buffer */
4087 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4088 basestring_doc, /* tp_doc */
4089 0, /* tp_traverse */
4090 0, /* tp_clear */
4091 0, /* tp_richcompare */
4092 0, /* tp_weaklistoffset */
4093 0, /* tp_iter */
4094 0, /* tp_iternext */
4095 0, /* tp_methods */
4096 0, /* tp_members */
4097 0, /* tp_getset */
4098 &PyBaseObject_Type, /* tp_base */
4099 0, /* tp_dict */
4100 0, /* tp_descr_get */
4101 0, /* tp_descr_set */
4102 0, /* tp_dictoffset */
4103 0, /* tp_init */
4104 0, /* tp_alloc */
4105 basestring_new, /* tp_new */
4106 0, /* tp_free */
4107};
4108
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004109PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004110"str(object) -> string\n\
4111\n\
4112Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004113If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004114
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004115PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004116 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004117 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004118 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004119 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004120 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004121 (printfunc)string_print, /* tp_print */
4122 0, /* tp_getattr */
4123 0, /* tp_setattr */
4124 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004125 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004126 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004127 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004128 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004129 (hashfunc)string_hash, /* tp_hash */
4130 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004131 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004132 PyObject_GenericGetAttr, /* tp_getattro */
4133 0, /* tp_setattro */
4134 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004135 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Christian Heimes1a6387e2008-03-26 12:49:49 +00004136 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4137 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004138 string_doc, /* tp_doc */
4139 0, /* tp_traverse */
4140 0, /* tp_clear */
4141 (richcmpfunc)string_richcompare, /* tp_richcompare */
4142 0, /* tp_weaklistoffset */
4143 0, /* tp_iter */
4144 0, /* tp_iternext */
4145 string_methods, /* tp_methods */
4146 0, /* tp_members */
4147 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004148 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004149 0, /* tp_dict */
4150 0, /* tp_descr_get */
4151 0, /* tp_descr_set */
4152 0, /* tp_dictoffset */
4153 0, /* tp_init */
4154 0, /* tp_alloc */
4155 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004156 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004157};
4158
4159void
Fred Drakeba096332000-07-09 07:04:36 +00004160PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004161{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004162 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004163 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004164 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004165 if (w == NULL || !PyString_Check(*pv)) {
4166 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004167 *pv = NULL;
4168 return;
4169 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004170 v = string_concat((PyStringObject *) *pv, w);
4171 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004172 *pv = v;
4173}
4174
Guido van Rossum013142a1994-08-30 08:19:36 +00004175void
Fred Drakeba096332000-07-09 07:04:36 +00004176PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004177{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004178 PyString_Concat(pv, w);
4179 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004180}
4181
4182
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004183/* The following function breaks the notion that strings are immutable:
4184 it changes the size of a string. We get away with this only if there
4185 is only one module referencing the object. You can also think of it
4186 as creating a new string object and destroying the old one, only
4187 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004188 already be known to some other part of the code...
4189 Note that if there's not enough memory to resize the string, the original
4190 string object at *pv is deallocated, *pv is set to NULL, an "out of
4191 memory" exception is set, and -1 is returned. Else (on success) 0 is
4192 returned, and the value in *pv may or may not be the same as on input.
4193 As always, an extra byte is allocated for a trailing \0 byte (newsize
4194 does *not* include that), and a trailing \0 byte is stored.
4195*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004196
4197int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004198_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004199{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004200 register PyObject *v;
4201 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004202 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004203 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004204 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004205 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004206 Py_DECREF(v);
4207 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004208 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004209 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004210 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004211 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004212 _Py_ForgetReference(v);
4213 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004214 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004215 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004216 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004217 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004218 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004219 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 _Py_NewReference(*pv);
4221 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004222 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004223 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004224 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004225 return 0;
4226}
Guido van Rossume5372401993-03-16 12:15:04 +00004227
4228/* Helpers for formatstring */
4229
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004230Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004231getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004232{
Thomas Wouters977485d2006-02-16 15:59:12 +00004233 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004234 if (argidx < arglen) {
4235 (*p_argidx)++;
4236 if (arglen < 0)
4237 return args;
4238 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004239 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004240 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004241 PyErr_SetString(PyExc_TypeError,
4242 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004243 return NULL;
4244}
4245
Tim Peters38fd5b62000-09-21 05:43:11 +00004246/* Format codes
4247 * F_LJUST '-'
4248 * F_SIGN '+'
4249 * F_BLANK ' '
4250 * F_ALT '#'
4251 * F_ZERO '0'
4252 */
Guido van Rossume5372401993-03-16 12:15:04 +00004253#define F_LJUST (1<<0)
4254#define F_SIGN (1<<1)
4255#define F_BLANK (1<<2)
4256#define F_ALT (1<<3)
4257#define F_ZERO (1<<4)
4258
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004259Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004260formatfloat(char *buf, size_t buflen, int flags,
4261 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004262{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004263 /* fmt = '%#.' + `prec` + `type`
4264 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004265 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004266 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004267 x = PyFloat_AsDouble(v);
4268 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004269 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004270 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004271 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004272 }
Guido van Rossume5372401993-03-16 12:15:04 +00004273 if (prec < 0)
4274 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004275 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4276 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004277 /* Worst case length calc to ensure no buffer overrun:
4278
4279 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004280 fmt = %#.<prec>g
4281 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004282 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004283 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004284
4285 'f' formats:
4286 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4287 len = 1 + 50 + 1 + prec = 52 + prec
4288
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004289 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004290 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004291
4292 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004293 if (((type == 'g' || type == 'G') &&
4294 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004295 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004296 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004297 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004298 return -1;
4299 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004300 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4301 (flags&F_ALT) ? "#" : "",
4302 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004303 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004304 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004305}
4306
Tim Peters38fd5b62000-09-21 05:43:11 +00004307/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4308 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4309 * Python's regular ints.
4310 * Return value: a new PyString*, or NULL if error.
4311 * . *pbuf is set to point into it,
4312 * *plen set to the # of chars following that.
4313 * Caller must decref it when done using pbuf.
4314 * The string starting at *pbuf is of the form
4315 * "-"? ("0x" | "0X")? digit+
4316 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004317 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004318 * There will be at least prec digits, zero-filled on the left if
4319 * necessary to get that many.
4320 * val object to be converted
4321 * flags bitmask of format flags; only F_ALT is looked at
4322 * prec minimum number of digits; 0-fill on left if needed
4323 * type a character in [duoxX]; u acts the same as d
4324 *
4325 * CAUTION: o, x and X conversions on regular ints can never
4326 * produce a '-' sign, but can for Python's unbounded ints.
4327 */
4328PyObject*
4329_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4330 char **pbuf, int *plen)
4331{
4332 PyObject *result = NULL;
4333 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004334 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004335 int sign; /* 1 if '-', else 0 */
4336 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004337 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004338 int numdigits; /* len == numnondigits + numdigits */
4339 int numnondigits = 0;
4340
4341 switch (type) {
4342 case 'd':
4343 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004344 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004345 break;
4346 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004347 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004348 break;
4349 case 'x':
4350 case 'X':
4351 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004352 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004353 break;
4354 default:
4355 assert(!"'type' not in [duoxX]");
4356 }
4357 if (!result)
4358 return NULL;
4359
Neal Norwitz56423e52006-08-13 18:11:08 +00004360 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004361 if (!buf) {
4362 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004363 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004364 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004365
Tim Peters38fd5b62000-09-21 05:43:11 +00004366 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004367 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004368 PyErr_BadInternalCall();
4369 return NULL;
4370 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004371 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004372 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004373 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4374 return NULL;
4375 }
4376 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004377 if (buf[len-1] == 'L') {
4378 --len;
4379 buf[len] = '\0';
4380 }
4381 sign = buf[0] == '-';
4382 numnondigits += sign;
4383 numdigits = len - numnondigits;
4384 assert(numdigits > 0);
4385
Tim Petersfff53252001-04-12 18:38:48 +00004386 /* Get rid of base marker unless F_ALT */
4387 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004388 /* Need to skip 0x, 0X or 0. */
4389 int skipped = 0;
4390 switch (type) {
4391 case 'o':
4392 assert(buf[sign] == '0');
4393 /* If 0 is only digit, leave it alone. */
4394 if (numdigits > 1) {
4395 skipped = 1;
4396 --numdigits;
4397 }
4398 break;
4399 case 'x':
4400 case 'X':
4401 assert(buf[sign] == '0');
4402 assert(buf[sign + 1] == 'x');
4403 skipped = 2;
4404 numnondigits -= 2;
4405 break;
4406 }
4407 if (skipped) {
4408 buf += skipped;
4409 len -= skipped;
4410 if (sign)
4411 buf[0] = '-';
4412 }
4413 assert(len == numnondigits + numdigits);
4414 assert(numdigits > 0);
4415 }
4416
4417 /* Fill with leading zeroes to meet minimum width. */
4418 if (prec > numdigits) {
4419 PyObject *r1 = PyString_FromStringAndSize(NULL,
4420 numnondigits + prec);
4421 char *b1;
4422 if (!r1) {
4423 Py_DECREF(result);
4424 return NULL;
4425 }
4426 b1 = PyString_AS_STRING(r1);
4427 for (i = 0; i < numnondigits; ++i)
4428 *b1++ = *buf++;
4429 for (i = 0; i < prec - numdigits; i++)
4430 *b1++ = '0';
4431 for (i = 0; i < numdigits; i++)
4432 *b1++ = *buf++;
4433 *b1 = '\0';
4434 Py_DECREF(result);
4435 result = r1;
4436 buf = PyString_AS_STRING(result);
4437 len = numnondigits + prec;
4438 }
4439
4440 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004441 if (type == 'X') {
4442 /* Need to convert all lower case letters to upper case.
4443 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004444 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004445 if (buf[i] >= 'a' && buf[i] <= 'x')
4446 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004447 }
4448 *pbuf = buf;
4449 *plen = len;
4450 return result;
4451}
4452
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004453Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004454formatint(char *buf, size_t buflen, int flags,
4455 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004456{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004457 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004458 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4459 + 1 + 1 = 24 */
4460 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004461 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004462 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004463
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004464 x = PyInt_AsLong(v);
4465 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004466 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004467 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004468 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004469 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004470 if (x < 0 && type == 'u') {
4471 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004472 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004473 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4474 sign = "-";
4475 else
4476 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004477 if (prec < 0)
4478 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004479
4480 if ((flags & F_ALT) &&
4481 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004482 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004483 * of issues that cause pain:
4484 * - when 0 is being converted, the C standard leaves off
4485 * the '0x' or '0X', which is inconsistent with other
4486 * %#x/%#X conversions and inconsistent with Python's
4487 * hex() function
4488 * - there are platforms that violate the standard and
4489 * convert 0 with the '0x' or '0X'
4490 * (Metrowerks, Compaq Tru64)
4491 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004492 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004493 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004494 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004495 * We can achieve the desired consistency by inserting our
4496 * own '0x' or '0X' prefix, and substituting %x/%X in place
4497 * of %#x/%#X.
4498 *
4499 * Note that this is the same approach as used in
4500 * formatint() in unicodeobject.c
4501 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004502 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4503 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004504 }
4505 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004506 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4507 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004508 prec, type);
4509 }
4510
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004511 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4512 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004513 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004514 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004515 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004516 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004517 return -1;
4518 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004519 if (sign[0])
4520 PyOS_snprintf(buf, buflen, fmt, -x);
4521 else
4522 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004523 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004524}
4525
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004526Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004527formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004528{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004529 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 if (PyString_Check(v)) {
4531 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004532 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004533 }
4534 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004536 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004537 }
4538 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004539 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004540}
4541
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004542/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4543
4544 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4545 chars are formatted. XXX This is a magic number. Each formatting
4546 routine does bounds checking to ensure no overflow, but a better
4547 solution may be to malloc a buffer of appropriate size for each
4548 format. For now, the current solution is sufficient.
4549*/
4550#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004551
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004552PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004553PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004554{
4555 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004556 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004557 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004558 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004559 PyObject *result, *orig_args;
4560#ifdef Py_USING_UNICODE
4561 PyObject *v, *w;
4562#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004563 PyObject *dict = NULL;
4564 if (format == NULL || !PyString_Check(format) || args == NULL) {
4565 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004566 return NULL;
4567 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004568 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004569 fmt = PyString_AS_STRING(format);
4570 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004571 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004573 if (result == NULL)
4574 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 res = PyString_AsString(result);
4576 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004577 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004578 argidx = 0;
4579 }
4580 else {
4581 arglen = -1;
4582 argidx = -2;
4583 }
Christian Heimese93237d2007-12-19 02:37:44 +00004584 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004585 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004586 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004587 while (--fmtcnt >= 0) {
4588 if (*fmt != '%') {
4589 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004590 rescnt = fmtcnt + 100;
4591 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004593 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004594 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004596 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004597 }
4598 *res++ = *fmt++;
4599 }
4600 else {
4601 /* Got a format specifier */
4602 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004603 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004604 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004605 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004606 int fill;
Facundo Batistac11cecf2008-02-24 03:17:21 +00004607 int isnumok;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004608 PyObject *v = NULL;
4609 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004610 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004611 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004612 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004613 char formatbuf[FORMATBUFLEN];
4614 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004615#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004616 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004617 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004618#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004619
Guido van Rossumda9c2711996-12-05 21:58:58 +00004620 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004621 if (*fmt == '(') {
4622 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004623 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004624 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004625 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004626
4627 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004628 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004629 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004630 goto error;
4631 }
4632 ++fmt;
4633 --fmtcnt;
4634 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004635 /* Skip over balanced parentheses */
4636 while (pcount > 0 && --fmtcnt >= 0) {
4637 if (*fmt == ')')
4638 --pcount;
4639 else if (*fmt == '(')
4640 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004641 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004642 }
4643 keylen = fmt - keystart - 1;
4644 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004645 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004646 "incomplete format key");
4647 goto error;
4648 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 key = PyString_FromStringAndSize(keystart,
4650 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004651 if (key == NULL)
4652 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004653 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004655 args_owned = 0;
4656 }
4657 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004658 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004659 if (args == NULL) {
4660 goto error;
4661 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004662 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004663 arglen = -1;
4664 argidx = -2;
4665 }
Guido van Rossume5372401993-03-16 12:15:04 +00004666 while (--fmtcnt >= 0) {
4667 switch (c = *fmt++) {
4668 case '-': flags |= F_LJUST; continue;
4669 case '+': flags |= F_SIGN; continue;
4670 case ' ': flags |= F_BLANK; continue;
4671 case '#': flags |= F_ALT; continue;
4672 case '0': flags |= F_ZERO; continue;
4673 }
4674 break;
4675 }
4676 if (c == '*') {
4677 v = getnextarg(args, arglen, &argidx);
4678 if (v == NULL)
4679 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004680 if (!PyInt_Check(v)) {
4681 PyErr_SetString(PyExc_TypeError,
4682 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004683 goto error;
4684 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004685 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004686 if (width < 0) {
4687 flags |= F_LJUST;
4688 width = -width;
4689 }
Guido van Rossume5372401993-03-16 12:15:04 +00004690 if (--fmtcnt >= 0)
4691 c = *fmt++;
4692 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004693 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004694 width = c - '0';
4695 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004696 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004697 if (!isdigit(c))
4698 break;
4699 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004700 PyErr_SetString(
4701 PyExc_ValueError,
4702 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004703 goto error;
4704 }
4705 width = width*10 + (c - '0');
4706 }
4707 }
4708 if (c == '.') {
4709 prec = 0;
4710 if (--fmtcnt >= 0)
4711 c = *fmt++;
4712 if (c == '*') {
4713 v = getnextarg(args, arglen, &argidx);
4714 if (v == NULL)
4715 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004716 if (!PyInt_Check(v)) {
4717 PyErr_SetString(
4718 PyExc_TypeError,
4719 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004720 goto error;
4721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004722 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004723 if (prec < 0)
4724 prec = 0;
4725 if (--fmtcnt >= 0)
4726 c = *fmt++;
4727 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004728 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004729 prec = c - '0';
4730 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004731 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004732 if (!isdigit(c))
4733 break;
4734 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004735 PyErr_SetString(
4736 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004737 "prec too big");
4738 goto error;
4739 }
4740 prec = prec*10 + (c - '0');
4741 }
4742 }
4743 } /* prec */
4744 if (fmtcnt >= 0) {
4745 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004746 if (--fmtcnt >= 0)
4747 c = *fmt++;
4748 }
4749 }
4750 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004751 PyErr_SetString(PyExc_ValueError,
4752 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004753 goto error;
4754 }
4755 if (c != '%') {
4756 v = getnextarg(args, arglen, &argidx);
4757 if (v == NULL)
4758 goto error;
4759 }
4760 sign = 0;
4761 fill = ' ';
4762 switch (c) {
4763 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004764 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004765 len = 1;
4766 break;
4767 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004768#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004769 if (PyUnicode_Check(v)) {
4770 fmt = fmt_start;
4771 argidx = argidx_start;
4772 goto unicode;
4773 }
Georg Brandld45014b2005-10-01 17:06:00 +00004774#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004775 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004776#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004777 if (temp != NULL && PyUnicode_Check(temp)) {
4778 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004779 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004780 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004781 goto unicode;
4782 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004783#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004784 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004785 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004786 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004787 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004788 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004789 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004790 if (!PyString_Check(temp)) {
4791 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004792 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004793 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004794 goto error;
4795 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004796 pbuf = PyString_AS_STRING(temp);
4797 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004798 if (prec >= 0 && len > prec)
4799 len = prec;
4800 break;
4801 case 'i':
4802 case 'd':
4803 case 'u':
4804 case 'o':
4805 case 'x':
4806 case 'X':
4807 if (c == 'i')
4808 c = 'd';
Facundo Batistac11cecf2008-02-24 03:17:21 +00004809 isnumok = 0;
4810 if (PyNumber_Check(v)) {
4811 PyObject *iobj=NULL;
4812
4813 if (PyInt_Check(v) || (PyLong_Check(v))) {
4814 iobj = v;
4815 Py_INCREF(iobj);
4816 }
4817 else {
4818 iobj = PyNumber_Int(v);
4819 if (iobj==NULL) iobj = PyNumber_Long(v);
4820 }
4821 if (iobj!=NULL) {
4822 if (PyInt_Check(iobj)) {
4823 isnumok = 1;
4824 pbuf = formatbuf;
4825 len = formatint(pbuf,
4826 sizeof(formatbuf),
4827 flags, prec, c, iobj);
4828 Py_DECREF(iobj);
4829 if (len < 0)
4830 goto error;
4831 sign = 1;
4832 }
4833 else if (PyLong_Check(iobj)) {
4834 int ilen;
4835
4836 isnumok = 1;
4837 temp = _PyString_FormatLong(iobj, flags,
4838 prec, c, &pbuf, &ilen);
4839 Py_DECREF(iobj);
4840 len = ilen;
4841 if (!temp)
4842 goto error;
4843 sign = 1;
4844 }
4845 else {
4846 Py_DECREF(iobj);
4847 }
4848 }
Guido van Rossum4acdc231997-01-29 06:00:24 +00004849 }
Facundo Batistac11cecf2008-02-24 03:17:21 +00004850 if (!isnumok) {
4851 PyErr_Format(PyExc_TypeError,
4852 "%%%c format: a number is required, "
4853 "not %.200s", c, Py_TYPE(v)->tp_name);
4854 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004855 }
4856 if (flags & F_ZERO)
4857 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004858 break;
4859 case 'e':
4860 case 'E':
4861 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004862 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004863 case 'g':
4864 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004865 if (c == 'F')
4866 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004867 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004868 len = formatfloat(pbuf, sizeof(formatbuf),
4869 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004870 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004871 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004872 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004873 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004874 fill = '0';
4875 break;
4876 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004877#ifdef Py_USING_UNICODE
4878 if (PyUnicode_Check(v)) {
4879 fmt = fmt_start;
4880 argidx = argidx_start;
4881 goto unicode;
4882 }
4883#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004884 pbuf = formatbuf;
4885 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004886 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004887 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004888 break;
4889 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004890 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004891 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004892 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004893 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004894 (Py_ssize_t)(fmt - 1 -
4895 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004896 goto error;
4897 }
4898 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004899 if (*pbuf == '-' || *pbuf == '+') {
4900 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004901 len--;
4902 }
4903 else if (flags & F_SIGN)
4904 sign = '+';
4905 else if (flags & F_BLANK)
4906 sign = ' ';
4907 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004908 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004909 }
4910 if (width < len)
4911 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004912 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004913 reslen -= rescnt;
4914 rescnt = width + fmtcnt + 100;
4915 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004916 if (reslen < 0) {
4917 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004918 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004919 return PyErr_NoMemory();
4920 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004921 if (_PyString_Resize(&result, reslen) < 0) {
4922 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004923 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004924 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004925 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004926 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004927 }
4928 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004929 if (fill != ' ')
4930 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004931 rescnt--;
4932 if (width > len)
4933 width--;
4934 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004935 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4936 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004937 assert(pbuf[1] == c);
4938 if (fill != ' ') {
4939 *res++ = *pbuf++;
4940 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004941 }
Tim Petersfff53252001-04-12 18:38:48 +00004942 rescnt -= 2;
4943 width -= 2;
4944 if (width < 0)
4945 width = 0;
4946 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004947 }
4948 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004949 do {
4950 --rescnt;
4951 *res++ = fill;
4952 } while (--width > len);
4953 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004954 if (fill == ' ') {
4955 if (sign)
4956 *res++ = sign;
4957 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004958 (c == 'x' || c == 'X')) {
4959 assert(pbuf[0] == '0');
4960 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004961 *res++ = *pbuf++;
4962 *res++ = *pbuf++;
4963 }
4964 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004965 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004966 res += len;
4967 rescnt -= len;
4968 while (--width >= len) {
4969 --rescnt;
4970 *res++ = ' ';
4971 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004972 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004973 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004974 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004975 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004976 goto error;
4977 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004978 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004979 } /* '%' */
4980 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004981 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004982 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004983 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004984 goto error;
4985 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004986 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004987 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004988 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004989 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004990 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004991
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004992#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004993 unicode:
4994 if (args_owned) {
4995 Py_DECREF(args);
4996 args_owned = 0;
4997 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004998 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004999 if (PyTuple_Check(orig_args) && argidx > 0) {
5000 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00005001 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00005002 v = PyTuple_New(n);
5003 if (v == NULL)
5004 goto error;
5005 while (--n >= 0) {
5006 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5007 Py_INCREF(w);
5008 PyTuple_SET_ITEM(v, n, w);
5009 }
5010 args = v;
5011 } else {
5012 Py_INCREF(orig_args);
5013 args = orig_args;
5014 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005015 args_owned = 1;
5016 /* Take what we have of the result and let the Unicode formatting
5017 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00005018 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005019 if (_PyString_Resize(&result, rescnt))
5020 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00005021 fmtcnt = PyString_GET_SIZE(format) - \
5022 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005023 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5024 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00005025 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005026 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00005027 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005028 if (v == NULL)
5029 goto error;
5030 /* Paste what we have (result) to what the Unicode formatting
5031 function returned (v) and return the result (or error) */
5032 w = PyUnicode_Concat(result, v);
5033 Py_DECREF(result);
5034 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005035 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005036 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005037#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005038
Guido van Rossume5372401993-03-16 12:15:04 +00005039 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005040 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005041 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005042 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005043 }
Guido van Rossume5372401993-03-16 12:15:04 +00005044 return NULL;
5045}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005046
Guido van Rossum2a61e741997-01-18 07:55:05 +00005047void
Fred Drakeba096332000-07-09 07:04:36 +00005048PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005049{
5050 register PyStringObject *s = (PyStringObject *)(*p);
5051 PyObject *t;
5052 if (s == NULL || !PyString_Check(s))
5053 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005054 /* If it's a string subclass, we don't really know what putting
5055 it in the interned dict might do. */
5056 if (!PyString_CheckExact(s))
5057 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005058 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005059 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005060 if (interned == NULL) {
5061 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005062 if (interned == NULL) {
5063 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005064 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005065 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005066 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005067 t = PyDict_GetItem(interned, (PyObject *)s);
5068 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005069 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005070 Py_DECREF(*p);
5071 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005072 return;
5073 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005074
Armin Rigo79f7ad22004-08-07 19:27:39 +00005075 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005076 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005077 return;
5078 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005079 /* The two references in interned are not counted by refcnt.
5080 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005081 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005082 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005083}
5084
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005085void
5086PyString_InternImmortal(PyObject **p)
5087{
5088 PyString_InternInPlace(p);
5089 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5090 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5091 Py_INCREF(*p);
5092 }
5093}
5094
Guido van Rossum2a61e741997-01-18 07:55:05 +00005095
5096PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005097PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005098{
5099 PyObject *s = PyString_FromString(cp);
5100 if (s == NULL)
5101 return NULL;
5102 PyString_InternInPlace(&s);
5103 return s;
5104}
5105
Guido van Rossum8cf04761997-08-02 02:57:45 +00005106void
Fred Drakeba096332000-07-09 07:04:36 +00005107PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005108{
5109 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005110 for (i = 0; i < UCHAR_MAX + 1; i++) {
5111 Py_XDECREF(characters[i]);
5112 characters[i] = NULL;
5113 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005114 Py_XDECREF(nullstring);
5115 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005116}
Barry Warsawa903ad982001-02-23 16:40:48 +00005117
Barry Warsawa903ad982001-02-23 16:40:48 +00005118void _Py_ReleaseInternedStrings(void)
5119{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005120 PyObject *keys;
5121 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005122 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005123 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005124
5125 if (interned == NULL || !PyDict_Check(interned))
5126 return;
5127 keys = PyDict_Keys(interned);
5128 if (keys == NULL || !PyList_Check(keys)) {
5129 PyErr_Clear();
5130 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005131 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005132
5133 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5134 detector, interned strings are not forcibly deallocated; rather, we
5135 give them their stolen references back, and then clear and DECREF
5136 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005137
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005138 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005139 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5140 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005141 for (i = 0; i < n; i++) {
5142 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5143 switch (s->ob_sstate) {
5144 case SSTATE_NOT_INTERNED:
5145 /* XXX Shouldn't happen */
5146 break;
5147 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005148 Py_REFCNT(s) += 1;
5149 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005150 break;
5151 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005152 Py_REFCNT(s) += 2;
5153 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005154 break;
5155 default:
5156 Py_FatalError("Inconsistent interned string state.");
5157 }
5158 s->ob_sstate = SSTATE_NOT_INTERNED;
5159 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005160 fprintf(stderr, "total size of all interned strings: "
5161 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5162 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005163 Py_DECREF(keys);
5164 PyDict_Clear(interned);
5165 Py_DECREF(interned);
5166 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005167}