blob: ed2ffddadf4104625a0f1e2e972267981039b845 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Eric Smitha9f7d622008-02-17 19:46:49 +00007#include "formatter_string.h"
8
Guido van Rossum013142a1994-08-30 08:19:36 +00009#include <ctype.h>
10
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000011#ifdef COUNT_ALLOCS
12int null_strings, one_strings;
13#endif
14
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
Tim Petersae1d0c92006-03-17 03:29:34 +000023 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000024 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000028/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000029 For both PyString_FromString() and PyString_FromStringAndSize(), the
30 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000034 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000035
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000036 For PyString_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyString_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
44 PyString object must be treated as immutable and you must not fill in nor
45 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
49 allocated for string data, not counting the null terminating character. It
50 is therefore equal to the equal to the `size' parameter (for
51 PyString_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000053*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000055PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000056{
Tim Peters9e897f42001-05-09 07:37:07 +000057 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000058 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000077 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000078 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000080 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000082 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000084 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000086 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000088 PyObject *t = (PyObject *)op;
89 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000090 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000094 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000096 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101}
102
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000103PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000104PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Tim Peters62de65b2001-12-06 20:29:32 +0000106 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000107 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000108
109 assert(str != NULL);
110 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000111 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 PyErr_SetString(PyExc_OverflowError,
113 "string is too long for a Python string");
114 return NULL;
115 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 if (size == 0 && (op = nullstring) != NULL) {
117#ifdef COUNT_ALLOCS
118 null_strings++;
119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 Py_INCREF(op);
121 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 }
123 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
124#ifdef COUNT_ALLOCS
125 one_strings++;
126#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000127 Py_INCREF(op);
128 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000129 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000130
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000131 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000132 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000133 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000137 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000138 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000139 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000141 PyObject *t = (PyObject *)op;
142 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000143 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000149 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000154}
155
Barry Warsawdadace02001-08-24 18:32:06 +0000156PyObject *
157PyString_FromFormatV(const char *format, va_list vargs)
158{
Tim Petersc15c4f12001-10-02 21:32:07 +0000159 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000160 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000161 const char* f;
162 char *s;
163 PyObject* string;
164
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000166 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#ifdef __va_copy
169 __va_copy(count, vargs);
170#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000171 count = vargs;
172#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000173#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000174 /* step 1: figure out how large a buffer we need */
175 for (f = format; *f; f++) {
176 if (*f == '%') {
177 const char* p = f;
178 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
179 ;
180
Tim Peters8931ff12006-05-13 23:28:20 +0000181 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
182 * they don't affect the amount of space we reserve.
183 */
184 if ((*f == 'l' || *f == 'z') &&
185 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000195 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000257 /* handle the long flag, but only for %ld and %lu.
258 others can be added when necessary. */
259 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000264 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000276 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000277 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
278 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000279 else
280 sprintf(s, "%d", va_arg(vargs, int));
281 s += strlen(s);
282 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000283 case 'u':
284 if (longflag)
285 sprintf(s, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(s, "%u",
292 va_arg(vargs, unsigned int));
293 s += strlen(s);
294 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000295 case 'i':
296 sprintf(s, "%i", va_arg(vargs, int));
297 s += strlen(s);
298 break;
299 case 'x':
300 sprintf(s, "%x", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 's':
304 p = va_arg(vargs, char*);
305 i = strlen(p);
306 if (n > 0 && i > n)
307 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000308 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000309 s += i;
310 break;
311 case 'p':
312 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000313 /* %p is ill-defined: ensure leading 0x. */
314 if (s[1] == 'X')
315 s[1] = 'x';
316 else if (s[1] != 'x') {
317 memmove(s+2, s, strlen(s)+1);
318 s[0] = '0';
319 s[1] = 'x';
320 }
Barry Warsawdadace02001-08-24 18:32:06 +0000321 s += strlen(s);
322 break;
323 case '%':
324 *s++ = '%';
325 break;
326 default:
327 strcpy(s, p);
328 s += strlen(s);
329 goto end;
330 }
331 } else
332 *s++ = *f;
333 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000336 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000337 return string;
338}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339
Barry Warsawdadace02001-08-24 18:32:06 +0000340PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000341PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000342{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000343 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000344 va_list vargs;
345
346#ifdef HAVE_STDARG_PROTOTYPES
347 va_start(vargs, format);
348#else
349 va_start(vargs);
350#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000351 ret = PyString_FromFormatV(format, vargs);
352 va_end(vargs);
353 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354}
355
356
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000358 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 const char *encoding,
360 const char *errors)
361{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 PyObject *v, *str;
363
364 str = PyString_FromStringAndSize(s, size);
365 if (str == NULL)
366 return NULL;
367 v = PyString_AsDecodedString(str, encoding, errors);
368 Py_DECREF(str);
369 return v;
370}
371
372PyObject *PyString_AsDecodedObject(PyObject *str,
373 const char *encoding,
374 const char *errors)
375{
376 PyObject *v;
377
378 if (!PyString_Check(str)) {
379 PyErr_BadArgument();
380 goto onError;
381 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000382
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 if (encoding == NULL) {
384#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000385 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386#else
387 PyErr_SetString(PyExc_ValueError, "no encoding specified");
388 goto onError;
389#endif
390 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000391
392 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 v = PyCodec_Decode(str, encoding, errors);
394 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000396
397 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000398
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400 return NULL;
401}
402
403PyObject *PyString_AsDecodedString(PyObject *str,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v;
408
409 v = PyString_AsDecodedObject(str, encoding, errors);
410 if (v == NULL)
411 goto onError;
412
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000413#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000414 /* Convert Unicode to a string using the default encoding */
415 if (PyUnicode_Check(v)) {
416 PyObject *temp = v;
417 v = PyUnicode_AsEncodedString(v, NULL, NULL);
418 Py_DECREF(temp);
419 if (v == NULL)
420 goto onError;
421 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000423 if (!PyString_Check(v)) {
424 PyErr_Format(PyExc_TypeError,
425 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000426 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427 Py_DECREF(v);
428 goto onError;
429 }
430
431 return v;
432
433 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 return NULL;
435}
436
437PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000438 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 const char *encoding,
440 const char *errors)
441{
442 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000443
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 str = PyString_FromStringAndSize(s, size);
445 if (str == NULL)
446 return NULL;
447 v = PyString_AsEncodedString(str, encoding, errors);
448 Py_DECREF(str);
449 return v;
450}
451
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 const char *encoding,
454 const char *errors)
455{
456 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000457
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000458 if (!PyString_Check(str)) {
459 PyErr_BadArgument();
460 goto onError;
461 }
462
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000463 if (encoding == NULL) {
464#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000465 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466#else
467 PyErr_SetString(PyExc_ValueError, "no encoding specified");
468 goto onError;
469#endif
470 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000471
472 /* Encode via the codec registry */
473 v = PyCodec_Encode(str, encoding, errors);
474 if (v == NULL)
475 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476
477 return v;
478
479 onError:
480 return NULL;
481}
482
483PyObject *PyString_AsEncodedString(PyObject *str,
484 const char *encoding,
485 const char *errors)
486{
487 PyObject *v;
488
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000489 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490 if (v == NULL)
491 goto onError;
492
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000493#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000494 /* Convert Unicode to a string using the default encoding */
495 if (PyUnicode_Check(v)) {
496 PyObject *temp = v;
497 v = PyUnicode_AsEncodedString(v, NULL, NULL);
498 Py_DECREF(temp);
499 if (v == NULL)
500 goto onError;
501 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000502#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000503 if (!PyString_Check(v)) {
504 PyErr_Format(PyExc_TypeError,
505 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000506 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 Py_DECREF(v);
508 goto onError;
509 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000512
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 onError:
514 return NULL;
515}
516
Guido van Rossum234f9421993-06-17 12:35:49 +0000517static void
Fred Drakeba096332000-07-09 07:04:36 +0000518string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000520 switch (PyString_CHECK_INTERNED(op)) {
521 case SSTATE_NOT_INTERNED:
522 break;
523
524 case SSTATE_INTERNED_MORTAL:
525 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000526 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000527 if (PyDict_DelItem(interned, op) != 0)
528 Py_FatalError(
529 "deletion of interned string failed");
530 break;
531
532 case SSTATE_INTERNED_IMMORTAL:
533 Py_FatalError("Immortal interned string died.");
534
535 default:
536 Py_FatalError("Inconsistent interned string state.");
537 }
Christian Heimese93237d2007-12-19 02:37:44 +0000538 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000539}
540
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000541/* Unescape a backslash-escaped string. If unicode is non-zero,
542 the string is a u-literal. If recode_encoding is non-zero,
543 the string is UTF-8 encoded and should be re-encoded in the
544 specified encoding. */
545
546PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000549 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000550 const char *recode_encoding)
551{
552 int c;
553 char *p, *buf;
554 const char *end;
555 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000556 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000557 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 if (v == NULL)
559 return NULL;
560 p = buf = PyString_AsString(v);
561 end = s + len;
562 while (s < end) {
563 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000564 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000565#ifdef Py_USING_UNICODE
566 if (recode_encoding && (*s & 0x80)) {
567 PyObject *u, *w;
568 char *r;
569 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000570 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 t = s;
572 /* Decode non-ASCII bytes as UTF-8. */
573 while (t < end && (*t & 0x80)) t++;
574 u = PyUnicode_DecodeUTF8(s, t - s, errors);
575 if(!u) goto failed;
576
577 /* Recode them in target encoding. */
578 w = PyUnicode_AsEncodedString(
579 u, recode_encoding, errors);
580 Py_DECREF(u);
581 if (!w) goto failed;
582
583 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000584 assert(PyString_Check(w));
585 r = PyString_AS_STRING(w);
586 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000587 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000588 p += rn;
589 Py_DECREF(w);
590 s = t;
591 } else {
592 *p++ = *s++;
593 }
594#else
595 *p++ = *s++;
596#endif
597 continue;
598 }
599 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000600 if (s==end) {
601 PyErr_SetString(PyExc_ValueError,
602 "Trailing \\ in string");
603 goto failed;
604 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000605 switch (*s++) {
606 /* XXX This assumes ASCII! */
607 case '\n': break;
608 case '\\': *p++ = '\\'; break;
609 case '\'': *p++ = '\''; break;
610 case '\"': *p++ = '\"'; break;
611 case 'b': *p++ = '\b'; break;
612 case 'f': *p++ = '\014'; break; /* FF */
613 case 't': *p++ = '\t'; break;
614 case 'n': *p++ = '\n'; break;
615 case 'r': *p++ = '\r'; break;
616 case 'v': *p++ = '\013'; break; /* VT */
617 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
618 case '0': case '1': case '2': case '3':
619 case '4': case '5': case '6': case '7':
620 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000623 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000624 c = (c<<3) + *s++ - '0';
625 }
626 *p++ = c;
627 break;
628 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000629 if (s+1 < end &&
630 isxdigit(Py_CHARMASK(s[0])) &&
631 isxdigit(Py_CHARMASK(s[1])))
632 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000633 unsigned int x = 0;
634 c = Py_CHARMASK(*s);
635 s++;
636 if (isdigit(c))
637 x = c - '0';
638 else if (islower(c))
639 x = 10 + c - 'a';
640 else
641 x = 10 + c - 'A';
642 x = x << 4;
643 c = Py_CHARMASK(*s);
644 s++;
645 if (isdigit(c))
646 x += c - '0';
647 else if (islower(c))
648 x += 10 + c - 'a';
649 else
650 x += 10 + c - 'A';
651 *p++ = x;
652 break;
653 }
654 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000655 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000657 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 if (strcmp(errors, "replace") == 0) {
660 *p++ = '?';
661 } else if (strcmp(errors, "ignore") == 0)
662 /* do nothing */;
663 else {
664 PyErr_Format(PyExc_ValueError,
665 "decoding error; "
666 "unknown error handling code: %.400s",
667 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000668 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000669 }
670#ifndef Py_USING_UNICODE
671 case 'u':
672 case 'U':
673 case 'N':
674 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000675 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 "Unicode escapes not legal "
677 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000678 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000679 }
680#endif
681 default:
682 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000683 s--;
684 goto non_esc; /* an arbitry number of unescaped
685 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 }
687 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000688 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000689 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000690 return v;
691 failed:
692 Py_DECREF(v);
693 return NULL;
694}
695
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000696/* -------------------------------------------------------------------- */
697/* object api */
698
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700string_getsize(register PyObject *op)
701{
702 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (PyString_AsStringAndSize(op, &s, &len))
705 return -1;
706 return len;
707}
708
709static /*const*/ char *
710string_getbuffer(register PyObject *op)
711{
712 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000714 if (PyString_AsStringAndSize(op, &s, &len))
715 return NULL;
716 return s;
717}
718
Martin v. Löwis18e16552006-02-15 17:27:45 +0000719Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000720PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722 if (!PyString_Check(op))
723 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000724 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725}
726
727/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000728PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 if (!PyString_Check(op))
731 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000732 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733}
734
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735int
736PyString_AsStringAndSize(register PyObject *obj,
737 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000738 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739{
740 if (s == NULL) {
741 PyErr_BadInternalCall();
742 return -1;
743 }
744
745 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000747 if (PyUnicode_Check(obj)) {
748 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
749 if (obj == NULL)
750 return -1;
751 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000752 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000753#endif
754 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000755 PyErr_Format(PyExc_TypeError,
756 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000757 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000758 return -1;
759 }
760 }
761
762 *s = PyString_AS_STRING(obj);
763 if (len != NULL)
764 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000765 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000766 PyErr_SetString(PyExc_TypeError,
767 "expected string without null bytes");
768 return -1;
769 }
770 return 0;
771}
772
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000774/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000775
Eric Smitha9f7d622008-02-17 19:46:49 +0000776#include "stringlib/stringdefs.h"
Fredrik Lundha50d2012006-05-26 17:04:58 +0000777#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000778
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000779#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000780#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000784static int
Fred Drakeba096332000-07-09 07:04:36 +0000785string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786{
Brett Cannon01531592007-09-17 03:28:34 +0000787 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000790
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000791 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000792 if (! PyString_CheckExact(op)) {
793 int ret;
794 /* A str subclass may have its own __str__ method. */
795 op = (PyStringObject *) PyObject_Str((PyObject *)op);
796 if (op == NULL)
797 return -1;
798 ret = string_print(op, fp, flags);
799 Py_DECREF(op);
800 return ret;
801 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000802 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000803 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000804 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000805 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000806 while (size > INT_MAX) {
807 /* Very long strings cannot be written atomically.
808 * But don't write exactly INT_MAX bytes at a time
809 * to avoid memory aligment issues.
810 */
811 const int chunk_size = INT_MAX & ~0x3FFF;
812 fwrite(data, 1, chunk_size, fp);
813 data += chunk_size;
814 size -= chunk_size;
815 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000816#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000817 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000818#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000819 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000820#endif
Brett Cannon01531592007-09-17 03:28:34 +0000821 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000822 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824
Thomas Wouters7e474022000-07-16 12:04:32 +0000825 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000827 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
828 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 quote = '"';
830
Christian Heimese93237d2007-12-19 02:37:44 +0000831 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000832 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000834 for (i = 0; i < str_len; i++) {
835 /* Since strings are immutable and the caller should have a
836 reference, accessing the interal buffer should not be an issue
837 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000841 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000842 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000843 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\r");
847 else if (c < ' ' || c >= 0x7f)
848 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000849 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000852 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000853 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000854 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000857PyObject *
858PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000859{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000861 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000862 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000863 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000864 PyErr_SetString(PyExc_OverflowError,
865 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000866 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000867 }
868 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000870 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 }
872 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000873 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 register char c;
875 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 int quote;
877
Thomas Wouters7e474022000-07-16 12:04:32 +0000878 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000879 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000880 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000881 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
882 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 quote = '"';
884
Tim Peters9161c8b2001-12-03 01:55:38 +0000885 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000887 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 /* There's at least enough room for a hex escape
889 and a closing quote. */
890 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000894 else if (c == '\t')
895 *p++ = '\\', *p++ = 't';
896 else if (c == '\n')
897 *p++ = '\\', *p++ = 'n';
898 else if (c == '\r')
899 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000900 else if (c < ' ' || c >= 0x7f) {
901 /* For performance, we don't want to call
902 PyOS_snprintf here (extra layers of
903 function call). */
904 sprintf(p, "\\x%02x", c & 0xff);
905 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000906 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000907 else
908 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000910 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000911 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000914 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000915 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum189f1df2001-05-01 16:51:53 +0000919static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000920string_repr(PyObject *op)
921{
922 return PyString_Repr(op, 1);
923}
924
925static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000926string_str(PyObject *s)
927{
Tim Petersc9933152001-10-16 20:18:24 +0000928 assert(PyString_Check(s));
929 if (PyString_CheckExact(s)) {
930 Py_INCREF(s);
931 return s;
932 }
933 else {
934 /* Subtype -- return genuine string with the same value. */
935 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000936 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000937 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000938}
939
Martin v. Löwis18e16552006-02-15 17:27:45 +0000940static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000941string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942{
Christian Heimese93237d2007-12-19 02:37:44 +0000943 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944}
945
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000947string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948{
Andrew Dalke598710c2006-05-25 18:18:39 +0000949 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 register PyStringObject *op;
951 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000952#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000953 if (PyUnicode_Check(bb))
954 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000955#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000956 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000957 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000958 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959 return NULL;
960 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000963 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000964 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000965 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000966 Py_INCREF(bb);
967 return bb;
968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 Py_INCREF(a);
970 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971 }
Christian Heimese93237d2007-12-19 02:37:44 +0000972 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000973 if (size < 0) {
974 PyErr_SetString(PyExc_OverflowError,
975 "strings are too large to concat");
976 return NULL;
977 }
978
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000979 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000980 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000981 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000983 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000984 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000985 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000986 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
987 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000988 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990#undef b
991}
992
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000994string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000996 register Py_ssize_t i;
997 register Py_ssize_t j;
998 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001000 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001 if (n < 0)
1002 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001003 /* watch out for overflows: the size can overflow int,
1004 * and the # of bytes needed can overflow size_t
1005 */
Christian Heimese93237d2007-12-19 02:37:44 +00001006 size = Py_SIZE(a) * n;
1007 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001008 PyErr_SetString(PyExc_OverflowError,
1009 "repeated string is too long");
1010 return NULL;
1011 }
Christian Heimese93237d2007-12-19 02:37:44 +00001012 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001013 Py_INCREF(a);
1014 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015 }
Tim Peterse7c05322004-06-27 17:24:49 +00001016 nbytes = (size_t)size;
1017 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001018 PyErr_SetString(PyExc_OverflowError,
1019 "repeated string is too long");
1020 return NULL;
1021 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001023 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001024 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001026 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001027 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001028 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001029 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001030 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001031 memset(op->ob_sval, a->ob_sval[0] , n);
1032 return (PyObject *) op;
1033 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001034 i = 0;
1035 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001036 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1037 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001038 }
1039 while (i < size) {
1040 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001041 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001042 i += j;
1043 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045}
1046
1047/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1048
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001050string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001051 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001052 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001053{
1054 if (i < 0)
1055 i = 0;
1056 if (j < 0)
1057 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001058 if (j > Py_SIZE(a))
1059 j = Py_SIZE(a);
1060 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001061 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001062 Py_INCREF(a);
1063 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001064 }
1065 if (j < i)
1066 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001067 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001068}
1069
Guido van Rossum9284a572000-03-07 15:53:43 +00001070static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001071string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001072{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001074#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001075 if (PyUnicode_Check(sub_obj))
1076 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001077#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001078 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001079 PyErr_Format(PyExc_TypeError,
1080 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001081 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001082 return -1;
1083 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001084 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001085
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001086 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001087}
1088
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001089static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001093 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001094 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096 return NULL;
1097 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 pchar = a->ob_sval[i];
1099 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001100 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001101 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001102 else {
1103#ifdef COUNT_ALLOCS
1104 one_strings++;
1105#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001106 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001107 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001108 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001109}
1110
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111static PyObject*
1112string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001113{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001115 Py_ssize_t len_a, len_b;
1116 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117 PyObject *result;
1118
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001119 /* Make sure both arguments are strings. */
1120 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121 result = Py_NotImplemented;
1122 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001123 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 if (a == b) {
1125 switch (op) {
1126 case Py_EQ:case Py_LE:case Py_GE:
1127 result = Py_True;
1128 goto out;
1129 case Py_NE:case Py_LT:case Py_GT:
1130 result = Py_False;
1131 goto out;
1132 }
1133 }
1134 if (op == Py_EQ) {
1135 /* Supporting Py_NE here as well does not save
1136 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001137 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001138 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001139 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001140 result = Py_True;
1141 } else {
1142 result = Py_False;
1143 }
1144 goto out;
1145 }
Christian Heimese93237d2007-12-19 02:37:44 +00001146 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001147 min_len = (len_a < len_b) ? len_a : len_b;
1148 if (min_len > 0) {
1149 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1150 if (c==0)
1151 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001152 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 c = 0;
1154 if (c == 0)
1155 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1156 switch (op) {
1157 case Py_LT: c = c < 0; break;
1158 case Py_LE: c = c <= 0; break;
1159 case Py_EQ: assert(0); break; /* unreachable */
1160 case Py_NE: c = c != 0; break;
1161 case Py_GT: c = c > 0; break;
1162 case Py_GE: c = c >= 0; break;
1163 default:
1164 result = Py_NotImplemented;
1165 goto out;
1166 }
1167 result = c ? Py_True : Py_False;
1168 out:
1169 Py_INCREF(result);
1170 return result;
1171}
1172
1173int
1174_PyString_Eq(PyObject *o1, PyObject *o2)
1175{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001176 PyStringObject *a = (PyStringObject*) o1;
1177 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001178 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001179 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001180 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001181}
1182
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183static long
Fred Drakeba096332000-07-09 07:04:36 +00001184string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001186 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 register unsigned char *p;
1188 register long x;
1189
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001190 if (a->ob_shash != -1)
1191 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001192 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 p = (unsigned char *) a->ob_sval;
1194 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001195 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001196 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001197 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 if (x == -1)
1199 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001200 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 return x;
1202}
1203
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204static PyObject*
1205string_subscript(PyStringObject* self, PyObject* item)
1206{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001207 if (PyIndex_Check(item)) {
1208 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 if (i == -1 && PyErr_Occurred())
1210 return NULL;
1211 if (i < 0)
1212 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001213 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 }
1215 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001216 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 char* source_buf;
1218 char* result_buf;
1219 PyObject* result;
1220
Tim Petersae1d0c92006-03-17 03:29:34 +00001221 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001222 PyString_GET_SIZE(self),
1223 &start, &stop, &step, &slicelength) < 0) {
1224 return NULL;
1225 }
1226
1227 if (slicelength <= 0) {
1228 return PyString_FromStringAndSize("", 0);
1229 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001230 else if (start == 0 && step == 1 &&
1231 slicelength == PyString_GET_SIZE(self) &&
1232 PyString_CheckExact(self)) {
1233 Py_INCREF(self);
1234 return (PyObject *)self;
1235 }
1236 else if (step == 1) {
1237 return PyString_FromStringAndSize(
1238 PyString_AS_STRING(self) + start,
1239 slicelength);
1240 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241 else {
1242 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001243 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001244 if (result_buf == NULL)
1245 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246
Tim Petersae1d0c92006-03-17 03:29:34 +00001247 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248 cur += step, i++) {
1249 result_buf[i] = source_buf[cur];
1250 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001251
1252 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001253 slicelength);
1254 PyMem_Free(result_buf);
1255 return result;
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001258 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001259 PyErr_Format(PyExc_TypeError,
1260 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001261 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001262 return NULL;
1263 }
1264}
1265
Martin v. Löwis18e16552006-02-15 17:27:45 +00001266static Py_ssize_t
1267string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268{
1269 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001270 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001271 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272 return -1;
1273 }
1274 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001275 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276}
1277
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278static Py_ssize_t
1279string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001280{
Guido van Rossum045e6881997-09-08 18:30:11 +00001281 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001282 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001283 return -1;
1284}
1285
Martin v. Löwis18e16552006-02-15 17:27:45 +00001286static Py_ssize_t
1287string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001288{
1289 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001290 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001291 return 1;
1292}
1293
Martin v. Löwis18e16552006-02-15 17:27:45 +00001294static Py_ssize_t
1295string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001296{
1297 if ( index != 0 ) {
1298 PyErr_SetString(PyExc_SystemError,
1299 "accessing non-existent string segment");
1300 return -1;
1301 }
1302 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001303 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001304}
1305
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001306static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001307 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001308 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309 (ssizeargfunc)string_repeat, /*sq_repeat*/
1310 (ssizeargfunc)string_item, /*sq_item*/
1311 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001312 0, /*sq_ass_item*/
1313 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001314 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001315};
1316
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001317static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001318 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001319 (binaryfunc)string_subscript,
1320 0,
1321};
1322
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001323static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (readbufferproc)string_buffer_getreadbuf,
1325 (writebufferproc)string_buffer_getwritebuf,
1326 (segcountproc)string_buffer_getsegcount,
1327 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001328};
1329
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330
1331
1332#define LEFTSTRIP 0
1333#define RIGHTSTRIP 1
1334#define BOTHSTRIP 2
1335
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001336/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001337static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1338
1339#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001340
Andrew Dalke525eab32006-05-26 14:00:45 +00001341
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001342/* Don't call if length < 2 */
1343#define Py_STRING_MATCH(target, offset, pattern, length) \
1344 (target[offset] == pattern[0] && \
1345 target[offset+length-1] == pattern[length-1] && \
1346 !memcmp(target+offset+1, pattern+1, length-2) )
1347
1348
Andrew Dalke525eab32006-05-26 14:00:45 +00001349/* Overallocate the initial list to reduce the number of reallocs for small
1350 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1351 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1352 text (roughly 11 words per line) and field delimited data (usually 1-10
1353 fields). For large strings the split algorithms are bandwidth limited
1354 so increasing the preallocation likely will not improve things.*/
1355
1356#define MAX_PREALLOC 12
1357
1358/* 5 splits gives 6 elements */
1359#define PREALLOC_SIZE(maxsplit) \
1360 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1361
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001362#define SPLIT_APPEND(data, left, right) \
1363 str = PyString_FromStringAndSize((data) + (left), \
1364 (right) - (left)); \
1365 if (str == NULL) \
1366 goto onError; \
1367 if (PyList_Append(list, str)) { \
1368 Py_DECREF(str); \
1369 goto onError; \
1370 } \
1371 else \
1372 Py_DECREF(str);
1373
Andrew Dalke02758d62006-05-26 15:21:01 +00001374#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001375 str = PyString_FromStringAndSize((data) + (left), \
1376 (right) - (left)); \
1377 if (str == NULL) \
1378 goto onError; \
1379 if (count < MAX_PREALLOC) { \
1380 PyList_SET_ITEM(list, count, str); \
1381 } else { \
1382 if (PyList_Append(list, str)) { \
1383 Py_DECREF(str); \
1384 goto onError; \
1385 } \
1386 else \
1387 Py_DECREF(str); \
1388 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001389 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001390
1391/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001392#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001393
Andrew Dalke02758d62006-05-26 15:21:01 +00001394#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1395#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1396#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1397#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1398
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001399Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001400split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401{
Skip Montanaro26015492007-12-08 15:33:24 +00001402 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001403 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001404 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001405 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406
1407 if (list == NULL)
1408 return NULL;
1409
Andrew Dalke02758d62006-05-26 15:21:01 +00001410 i = j = 0;
1411
1412 while (maxsplit-- > 0) {
1413 SKIP_SPACE(s, i, len);
1414 if (i==len) break;
1415 j = i; i++;
1416 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001417 if (j == 0 && i == len && PyString_CheckExact(self)) {
1418 /* No whitespace in self, so just use it as list[0] */
1419 Py_INCREF(self);
1420 PyList_SET_ITEM(list, 0, (PyObject *)self);
1421 count++;
1422 break;
1423 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001424 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001426
1427 if (i < len) {
1428 /* Only occurs when maxsplit was reached */
1429 /* Skip any remaining whitespace and copy to end of string */
1430 SKIP_SPACE(s, i, len);
1431 if (i != len)
1432 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001434 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 Py_DECREF(list);
1438 return NULL;
1439}
1440
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001441Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001442split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443{
Skip Montanaro26015492007-12-08 15:33:24 +00001444 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001445 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001446 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001447 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001448
1449 if (list == NULL)
1450 return NULL;
1451
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001452 i = j = 0;
1453 while ((j < len) && (maxcount-- > 0)) {
1454 for(; j<len; j++) {
1455 /* I found that using memchr makes no difference */
1456 if (s[j] == ch) {
1457 SPLIT_ADD(s, i, j);
1458 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001459 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001460 }
1461 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 }
Skip Montanaro26015492007-12-08 15:33:24 +00001463 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1464 /* ch not in self, so just use self as list[0] */
1465 Py_INCREF(self);
1466 PyList_SET_ITEM(list, 0, (PyObject *)self);
1467 count++;
1468 }
1469 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001470 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001471 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001472 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 return list;
1474
1475 onError:
1476 Py_DECREF(list);
1477 return NULL;
1478}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001480PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481"S.split([sep [,maxsplit]]) -> list of strings\n\
1482\n\
1483Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001485splits are done. If sep is not specified or is None, any\n\
1486whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487
1488static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001489string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001491 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001492 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001494 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001495#ifdef USE_FAST
1496 Py_ssize_t pos;
1497#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498
Martin v. Löwis9c830762006-04-13 08:37:17 +00001499 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001502 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001504 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505 if (PyString_Check(subobj)) {
1506 sub = PyString_AS_STRING(subobj);
1507 n = PyString_GET_SIZE(subobj);
1508 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001509#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510 else if (PyUnicode_Check(subobj))
1511 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001512#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001513 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1514 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001515
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 if (n == 0) {
1517 PyErr_SetString(PyExc_ValueError, "empty separator");
1518 return NULL;
1519 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001520 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001521 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522
Andrew Dalke525eab32006-05-26 14:00:45 +00001523 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 if (list == NULL)
1525 return NULL;
1526
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001527#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001529 while (maxsplit-- > 0) {
1530 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1531 if (pos < 0)
1532 break;
1533 j = i+pos;
1534 SPLIT_ADD(s, i, j);
1535 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001537#else
1538 i = j = 0;
1539 while ((j+n <= len) && (maxsplit-- > 0)) {
1540 for (; j+n <= len; j++) {
1541 if (Py_STRING_MATCH(s, j, sub, n)) {
1542 SPLIT_ADD(s, i, j);
1543 i = j = j + n;
1544 break;
1545 }
1546 }
1547 }
1548#endif
1549 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001550 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 return list;
1552
Andrew Dalke525eab32006-05-26 14:00:45 +00001553 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554 Py_DECREF(list);
1555 return NULL;
1556}
1557
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558PyDoc_STRVAR(partition__doc__,
1559"S.partition(sep) -> (head, sep, tail)\n\
1560\n\
1561Searches for the separator sep in S, and returns the part before it,\n\
1562the separator itself, and the part after it. If the separator is not\n\
1563found, returns S and two empty strings.");
1564
1565static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001566string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001567{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001568 const char *sep;
1569 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001570
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001571 if (PyString_Check(sep_obj)) {
1572 sep = PyString_AS_STRING(sep_obj);
1573 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001574 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001575#ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001577 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001578#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001579 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001580 return NULL;
1581
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001582 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001583 (PyObject*) self,
1584 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1585 sep_obj, sep, sep_len
1586 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001587}
1588
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001589PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001590"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001591\n\
1592Searches for the separator sep in S, starting at the end of S, and returns\n\
1593the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001594separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001595
1596static PyObject *
1597string_rpartition(PyStringObject *self, PyObject *sep_obj)
1598{
1599 const char *sep;
1600 Py_ssize_t sep_len;
1601
1602 if (PyString_Check(sep_obj)) {
1603 sep = PyString_AS_STRING(sep_obj);
1604 sep_len = PyString_GET_SIZE(sep_obj);
1605 }
1606#ifdef Py_USING_UNICODE
1607 else if (PyUnicode_Check(sep_obj))
1608 return PyUnicode_Partition((PyObject *) self, sep_obj);
1609#endif
1610 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1611 return NULL;
1612
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001613 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001614 (PyObject*) self,
1615 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1616 sep_obj, sep, sep_len
1617 );
1618}
1619
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001620Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001621rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001622{
Skip Montanaro26015492007-12-08 15:33:24 +00001623 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001624 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001625 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001626 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001627
1628 if (list == NULL)
1629 return NULL;
1630
Andrew Dalke02758d62006-05-26 15:21:01 +00001631 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001632
Andrew Dalke02758d62006-05-26 15:21:01 +00001633 while (maxsplit-- > 0) {
1634 RSKIP_SPACE(s, i);
1635 if (i<0) break;
1636 j = i; i--;
1637 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001638 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1639 /* No whitespace in self, so just use it as list[0] */
1640 Py_INCREF(self);
1641 PyList_SET_ITEM(list, 0, (PyObject *)self);
1642 count++;
1643 break;
1644 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001645 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001646 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001647 if (i >= 0) {
1648 /* Only occurs when maxsplit was reached */
1649 /* Skip any remaining whitespace and copy to beginning of string */
1650 RSKIP_SPACE(s, i);
1651 if (i >= 0)
1652 SPLIT_ADD(s, 0, i + 1);
1653
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001654 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001655 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001656 if (PyList_Reverse(list) < 0)
1657 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001658 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001659 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660 Py_DECREF(list);
1661 return NULL;
1662}
1663
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001664Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001665rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001666{
Skip Montanaro26015492007-12-08 15:33:24 +00001667 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001668 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001669 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001670 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001671
1672 if (list == NULL)
1673 return NULL;
1674
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001675 i = j = len - 1;
1676 while ((i >= 0) && (maxcount-- > 0)) {
1677 for (; i >= 0; i--) {
1678 if (s[i] == ch) {
1679 SPLIT_ADD(s, i + 1, j + 1);
1680 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001681 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001682 }
1683 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001684 }
Skip Montanaro26015492007-12-08 15:33:24 +00001685 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1686 /* ch not in self, so just use self as list[0] */
1687 Py_INCREF(self);
1688 PyList_SET_ITEM(list, 0, (PyObject *)self);
1689 count++;
1690 }
1691 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001692 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001694 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001695 if (PyList_Reverse(list) < 0)
1696 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001697 return list;
1698
1699 onError:
1700 Py_DECREF(list);
1701 return NULL;
1702}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703
1704PyDoc_STRVAR(rsplit__doc__,
1705"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1706\n\
1707Return a list of the words in the string S, using sep as the\n\
1708delimiter string, starting at the end of the string and working\n\
1709to the front. If maxsplit is given, at most maxsplit splits are\n\
1710done. If sep is not specified or is None, any whitespace string\n\
1711is a separator.");
1712
1713static PyObject *
1714string_rsplit(PyStringObject *self, PyObject *args)
1715{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001716 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001717 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001718 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001719 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720
Martin v. Löwis9c830762006-04-13 08:37:17 +00001721 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return NULL;
1723 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001724 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001726 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001727 if (PyString_Check(subobj)) {
1728 sub = PyString_AS_STRING(subobj);
1729 n = PyString_GET_SIZE(subobj);
1730 }
1731#ifdef Py_USING_UNICODE
1732 else if (PyUnicode_Check(subobj))
1733 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1734#endif
1735 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1736 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001737
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001738 if (n == 0) {
1739 PyErr_SetString(PyExc_ValueError, "empty separator");
1740 return NULL;
1741 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001742 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001743 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001744
Andrew Dalke525eab32006-05-26 14:00:45 +00001745 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001746 if (list == NULL)
1747 return NULL;
1748
1749 j = len;
1750 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001751
Skip Montanaro26015492007-12-08 15:33:24 +00001752 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001753 while ( (i >= 0) && (maxsplit-- > 0) ) {
1754 for (; i>=0; i--) {
1755 if (Py_STRING_MATCH(s, i, sub, n)) {
1756 SPLIT_ADD(s, i + n, j);
1757 j = i;
1758 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001759 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001760 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001761 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001762 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001763 SPLIT_ADD(s, 0, j);
1764 FIX_PREALLOC_SIZE(list);
1765 if (PyList_Reverse(list) < 0)
1766 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001767 return list;
1768
Andrew Dalke525eab32006-05-26 14:00:45 +00001769onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001770 Py_DECREF(list);
1771 return NULL;
1772}
1773
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776"S.join(sequence) -> string\n\
1777\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001778Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001779sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780
1781static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001782string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783{
1784 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001785 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001788 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001789 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001790 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001791 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792
Tim Peters19fe14e2001-01-19 03:03:47 +00001793 seq = PySequence_Fast(orig, "");
1794 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 return NULL;
1796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001798 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 if (seqlen == 0) {
1800 Py_DECREF(seq);
1801 return PyString_FromString("");
1802 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001804 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001805 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1806 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001807 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001808 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001809 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001811
Raymond Hettinger674f2412004-08-23 23:23:54 +00001812 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001813 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001814 * Do a pre-pass to figure out the total amount of space we'll
1815 * need (sz), see whether any argument is absurd, and defer to
1816 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001817 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001818 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001819 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001820 item = PySequence_Fast_GET_ITEM(seq, i);
1821 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001822#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001823 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001824 /* Defer to Unicode join.
1825 * CAUTION: There's no gurantee that the
1826 * original sequence can be iterated over
1827 * again, so we must pass seq here.
1828 */
1829 PyObject *result;
1830 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001831 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001832 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001833 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001834#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001835 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001836 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001837 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001838 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001839 Py_DECREF(seq);
1840 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001841 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001842 sz += PyString_GET_SIZE(item);
1843 if (i != 0)
1844 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001845 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001846 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001847 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001848 Py_DECREF(seq);
1849 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001851 }
1852
1853 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001855 if (res == NULL) {
1856 Py_DECREF(seq);
1857 return NULL;
1858 }
1859
1860 /* Catenate everything. */
1861 p = PyString_AS_STRING(res);
1862 for (i = 0; i < seqlen; ++i) {
1863 size_t n;
1864 item = PySequence_Fast_GET_ITEM(seq, i);
1865 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001866 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001867 p += n;
1868 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001869 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001870 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001871 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001873
Jeremy Hylton49048292000-07-11 03:28:17 +00001874 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876}
1877
Tim Peters52e155e2001-06-16 05:42:57 +00001878PyObject *
1879_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001880{
Tim Petersa7259592001-06-16 05:11:17 +00001881 assert(sep != NULL && PyString_Check(sep));
1882 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001883 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001884}
1885
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001886Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001887string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001888{
1889 if (*end > len)
1890 *end = len;
1891 else if (*end < 0)
1892 *end += len;
1893 if (*end < 0)
1894 *end = 0;
1895 if (*start < 0)
1896 *start += len;
1897 if (*start < 0)
1898 *start = 0;
1899}
1900
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001901Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001902string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001905 const char *sub;
1906 Py_ssize_t sub_len;
1907 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001908 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909
Facundo Batista57d56692007-11-16 18:04:14 +00001910 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1911 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001912 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001913 /* To support None in "start" and "end" arguments, meaning
1914 the same as if they were not passed.
1915 */
1916 if (obj_start != Py_None)
1917 if (!_PyEval_SliceIndex(obj_start, &start))
1918 return -2;
1919 if (obj_end != Py_None)
1920 if (!_PyEval_SliceIndex(obj_end, &end))
1921 return -2;
1922
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 if (PyString_Check(subobj)) {
1924 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001925 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001926 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001927#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001929 return PyUnicode_Find(
1930 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001931#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001932 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001933 /* XXX - the "expected a character buffer object" is pretty
1934 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935 return -2;
1936
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001937 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001938 return stringlib_find_slice(
1939 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1940 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001941 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001942 return stringlib_rfind_slice(
1943 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1944 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.find(sub [,start [,end]]) -> int\n\
1950\n\
1951Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001952such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953arguments start and end are interpreted as in slice notation.\n\
1954\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001955Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956
1957static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001958string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 if (result == -2)
1962 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964}
1965
1966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001967PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968"S.index(sub [,start [,end]]) -> int\n\
1969\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971
1972static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001973string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 if (result == -2)
1977 return NULL;
1978 if (result == -1) {
1979 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001980 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981 return NULL;
1982 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984}
1985
1986
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001987PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988"S.rfind(sub [,start [,end]]) -> int\n\
1989\n\
1990Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001991such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992arguments start and end are interpreted as in slice notation.\n\
1993\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001994Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995
1996static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001997string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001999 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 if (result == -2)
2001 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002002 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003}
2004
2005
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002006PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007"S.rindex(sub [,start [,end]]) -> int\n\
2008\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002009Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010
2011static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002012string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 if (result == -2)
2016 return NULL;
2017 if (result == -1) {
2018 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002019 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 return NULL;
2021 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002022 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023}
2024
2025
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002026Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002027do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2028{
2029 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002031 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002032 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2033 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002034
2035 i = 0;
2036 if (striptype != RIGHTSTRIP) {
2037 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2038 i++;
2039 }
2040 }
2041
2042 j = len;
2043 if (striptype != LEFTSTRIP) {
2044 do {
2045 j--;
2046 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2047 j++;
2048 }
2049
2050 if (i == 0 && j == len && PyString_CheckExact(self)) {
2051 Py_INCREF(self);
2052 return (PyObject*)self;
2053 }
2054 else
2055 return PyString_FromStringAndSize(s+i, j-i);
2056}
2057
2058
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002059Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002060do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061{
2062 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002063 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065 i = 0;
2066 if (striptype != RIGHTSTRIP) {
2067 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2068 i++;
2069 }
2070 }
2071
2072 j = len;
2073 if (striptype != LEFTSTRIP) {
2074 do {
2075 j--;
2076 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2077 j++;
2078 }
2079
Tim Peters8fa5dd02001-09-12 02:18:30 +00002080 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 Py_INCREF(self);
2082 return (PyObject*)self;
2083 }
2084 else
2085 return PyString_FromStringAndSize(s+i, j-i);
2086}
2087
2088
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002089Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002090do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2091{
2092 PyObject *sep = NULL;
2093
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002094 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002095 return NULL;
2096
2097 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002098 if (PyString_Check(sep))
2099 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002100#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002101 else if (PyUnicode_Check(sep)) {
2102 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2103 PyObject *res;
2104 if (uniself==NULL)
2105 return NULL;
2106 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2107 striptype, sep);
2108 Py_DECREF(uniself);
2109 return res;
2110 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002111#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002112 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002113#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002114 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002115#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002116 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002117#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002118 STRIPNAME(striptype));
2119 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002120 }
2121
2122 return do_strip(self, striptype);
2123}
2124
2125
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002126PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002127"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128\n\
2129Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002130whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002131If chars is given and not None, remove characters in chars instead.\n\
2132If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133
2134static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002135string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002137 if (PyTuple_GET_SIZE(args) == 0)
2138 return do_strip(self, BOTHSTRIP); /* Common case */
2139 else
2140 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141}
2142
2143
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002144PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002145"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002147Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002148If chars is given and not None, remove characters in chars instead.\n\
2149If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150
2151static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002152string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002154 if (PyTuple_GET_SIZE(args) == 0)
2155 return do_strip(self, LEFTSTRIP); /* Common case */
2156 else
2157 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158}
2159
2160
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002161PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002162"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002164Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002165If chars is given and not None, remove characters in chars instead.\n\
2166If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167
2168static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002169string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002171 if (PyTuple_GET_SIZE(args) == 0)
2172 return do_strip(self, RIGHTSTRIP); /* Common case */
2173 else
2174 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175}
2176
2177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179"S.lower() -> string\n\
2180\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002181Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002183/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2184#ifndef _tolower
2185#define _tolower tolower
2186#endif
2187
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002189string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002191 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002192 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002193 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002195 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002196 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002198
2199 s = PyString_AS_STRING(newobj);
2200
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002201 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002204 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002205 if (isupper(c))
2206 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002208
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213"S.upper() -> string\n\
2214\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002215Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002217#ifndef _toupper
2218#define _toupper toupper
2219#endif
2220
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002222string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002224 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002225 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002228 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002229 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002231
2232 s = PyString_AS_STRING(newobj);
2233
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002234 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002235
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002237 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002238 if (islower(c))
2239 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002241
Anthony Baxtera6286212006-04-11 07:42:36 +00002242 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243}
2244
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002245PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246"S.title() -> string\n\
2247\n\
2248Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002249characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250
2251static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002252string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253{
2254 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002255 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258
Anthony Baxtera6286212006-04-11 07:42:36 +00002259 newobj = PyString_FromStringAndSize(NULL, n);
2260 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002262 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263 for (i = 0; i < n; i++) {
2264 int c = Py_CHARMASK(*s++);
2265 if (islower(c)) {
2266 if (!previous_is_cased)
2267 c = toupper(c);
2268 previous_is_cased = 1;
2269 } else if (isupper(c)) {
2270 if (previous_is_cased)
2271 c = tolower(c);
2272 previous_is_cased = 1;
2273 } else
2274 previous_is_cased = 0;
2275 *s_new++ = c;
2276 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002277 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278}
2279
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002280PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281"S.capitalize() -> string\n\
2282\n\
2283Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002284capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285
2286static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002287string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288{
2289 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002290 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002291 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292
Anthony Baxtera6286212006-04-11 07:42:36 +00002293 newobj = PyString_FromStringAndSize(NULL, n);
2294 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002296 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297 if (0 < n) {
2298 int c = Py_CHARMASK(*s++);
2299 if (islower(c))
2300 *s_new = toupper(c);
2301 else
2302 *s_new = c;
2303 s_new++;
2304 }
2305 for (i = 1; i < n; i++) {
2306 int c = Py_CHARMASK(*s++);
2307 if (isupper(c))
2308 *s_new = tolower(c);
2309 else
2310 *s_new = c;
2311 s_new++;
2312 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002313 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314}
2315
2316
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002317PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318"S.count(sub[, start[, end]]) -> int\n\
2319\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002320Return the number of non-overlapping occurrences of substring sub in\n\
2321string S[start:end]. Optional arguments start and end are interpreted\n\
2322as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323
2324static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002325string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002327 PyObject *sub_obj;
2328 const char *str = PyString_AS_STRING(self), *sub;
2329 Py_ssize_t sub_len;
2330 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002332 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2333 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002335
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002336 if (PyString_Check(sub_obj)) {
2337 sub = PyString_AS_STRING(sub_obj);
2338 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002340#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002341 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002342 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002343 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002344 if (count == -1)
2345 return NULL;
2346 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002347 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002348 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002349#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002350 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 return NULL;
2352
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002353 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002354
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002355 return PyInt_FromSsize_t(
2356 stringlib_count(str + start, end - start, sub, sub_len)
2357 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358}
2359
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002360PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361"S.swapcase() -> string\n\
2362\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002364converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365
2366static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002367string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368{
2369 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002370 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002371 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372
Anthony Baxtera6286212006-04-11 07:42:36 +00002373 newobj = PyString_FromStringAndSize(NULL, n);
2374 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002376 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 for (i = 0; i < n; i++) {
2378 int c = Py_CHARMASK(*s++);
2379 if (islower(c)) {
2380 *s_new = toupper(c);
2381 }
2382 else if (isupper(c)) {
2383 *s_new = tolower(c);
2384 }
2385 else
2386 *s_new = c;
2387 s_new++;
2388 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002389 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390}
2391
2392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002393PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394"S.translate(table [,deletechars]) -> string\n\
2395\n\
2396Return a copy of the string S, where all characters occurring\n\
2397in the optional argument deletechars are removed, and the\n\
2398remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002399translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400
2401static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002402string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002405 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002406 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002408 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002409 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 PyObject *result;
2411 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002414 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417
2418 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002419 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420 tablen = PyString_GET_SIZE(tableobj);
2421 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002422 else if (tableobj == Py_None) {
2423 table = NULL;
2424 tablen = 256;
2425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002426#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002428 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 parameter; instead a mapping to None will cause characters
2430 to be deleted. */
2431 if (delobj != NULL) {
2432 PyErr_SetString(PyExc_TypeError,
2433 "deletions are implemented differently for unicode");
2434 return NULL;
2435 }
2436 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2437 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002438#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002439 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002441
Martin v. Löwis00b61272002-12-12 20:03:19 +00002442 if (tablen != 256) {
2443 PyErr_SetString(PyExc_ValueError,
2444 "translation table must be 256 characters long");
2445 return NULL;
2446 }
2447
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 if (delobj != NULL) {
2449 if (PyString_Check(delobj)) {
2450 del_table = PyString_AS_STRING(delobj);
2451 dellen = PyString_GET_SIZE(delobj);
2452 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002453#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 else if (PyUnicode_Check(delobj)) {
2455 PyErr_SetString(PyExc_TypeError,
2456 "deletions are implemented differently for unicode");
2457 return NULL;
2458 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002459#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2461 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462 }
2463 else {
2464 del_table = NULL;
2465 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466 }
2467
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002468 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469 result = PyString_FromStringAndSize((char *)NULL, inlen);
2470 if (result == NULL)
2471 return NULL;
2472 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002473 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002475 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476 /* If no deletions are required, use faster code */
2477 for (i = inlen; --i >= 0; ) {
2478 c = Py_CHARMASK(*input++);
2479 if (Py_CHARMASK((*output++ = table[c])) != c)
2480 changed = 1;
2481 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002482 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 return result;
2484 Py_DECREF(result);
2485 Py_INCREF(input_obj);
2486 return input_obj;
2487 }
2488
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002489 if (table == NULL) {
2490 for (i = 0; i < 256; i++)
2491 trans_table[i] = Py_CHARMASK(i);
2492 } else {
2493 for (i = 0; i < 256; i++)
2494 trans_table[i] = Py_CHARMASK(table[i]);
2495 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496
2497 for (i = 0; i < dellen; i++)
2498 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2499
2500 for (i = inlen; --i >= 0; ) {
2501 c = Py_CHARMASK(*input++);
2502 if (trans_table[c] != -1)
2503 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2504 continue;
2505 changed = 1;
2506 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002507 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508 Py_DECREF(result);
2509 Py_INCREF(input_obj);
2510 return input_obj;
2511 }
2512 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002513 if (inlen > 0)
2514 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515 return result;
2516}
2517
2518
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002519#define FORWARD 1
2520#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002522/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002524#define findchar(target, target_len, c) \
2525 ((char *)memchr((const void *)(target), c, target_len))
2526
2527/* String ops must return a string. */
2528/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002529Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002530return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002532 if (PyString_CheckExact(self)) {
2533 Py_INCREF(self);
2534 return self;
2535 }
2536 return (PyStringObject *)PyString_FromStringAndSize(
2537 PyString_AS_STRING(self),
2538 PyString_GET_SIZE(self));
2539}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002541Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002542countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543{
2544 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002545 const char *start=target;
2546 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002548 while ( (start=findchar(start, end-start, c)) != NULL ) {
2549 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002550 if (count >= maxcount)
2551 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002552 start += 1;
2553 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002554 return count;
2555}
2556
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002557Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002558findstring(const char *target, Py_ssize_t target_len,
2559 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560 Py_ssize_t start,
2561 Py_ssize_t end,
2562 int direction)
2563{
2564 if (start < 0) {
2565 start += target_len;
2566 if (start < 0)
2567 start = 0;
2568 }
2569 if (end > target_len) {
2570 end = target_len;
2571 } else if (end < 0) {
2572 end += target_len;
2573 if (end < 0)
2574 end = 0;
2575 }
2576
2577 /* zero-length substrings always match at the first attempt */
2578 if (pattern_len == 0)
2579 return (direction > 0) ? start : end;
2580
2581 end -= pattern_len;
2582
2583 if (direction < 0) {
2584 for (; end >= start; end--)
2585 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2586 return end;
2587 } else {
2588 for (; start <= end; start++)
2589 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2590 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591 }
2592 return -1;
2593}
2594
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002595Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002596countstring(const char *target, Py_ssize_t target_len,
2597 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598 Py_ssize_t start,
2599 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002600 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002601{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002603
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002604 if (start < 0) {
2605 start += target_len;
2606 if (start < 0)
2607 start = 0;
2608 }
2609 if (end > target_len) {
2610 end = target_len;
2611 } else if (end < 0) {
2612 end += target_len;
2613 if (end < 0)
2614 end = 0;
2615 }
2616
2617 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002618 if (pattern_len == 0 || maxcount == 0) {
2619 if (target_len+1 < maxcount)
2620 return target_len+1;
2621 return maxcount;
2622 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002623
2624 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002625 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002626 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002627 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2628 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002629 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002630 end -= pattern_len-1;
2631 }
2632 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002633 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002634 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2635 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002636 if (--maxcount <= 0)
2637 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638 start += pattern_len-1;
2639 }
2640 }
2641 return count;
2642}
2643
2644
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002645/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646
2647/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002648Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002650 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002651 Py_ssize_t maxcount)
2652{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002653 char *self_s, *result_s;
2654 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002655 Py_ssize_t count, i, product;
2656 PyStringObject *result;
2657
2658 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002659
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002660 /* 1 at the end plus 1 after every character */
2661 count = self_len+1;
2662 if (maxcount < count)
2663 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002664
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665 /* Check for overflow */
2666 /* result_len = count * to_len + self_len; */
2667 product = count * to_len;
2668 if (product / to_len != count) {
2669 PyErr_SetString(PyExc_OverflowError,
2670 "replace string is too long");
2671 return NULL;
2672 }
2673 result_len = product + self_len;
2674 if (result_len < 0) {
2675 PyErr_SetString(PyExc_OverflowError,
2676 "replace string is too long");
2677 return NULL;
2678 }
2679
2680 if (! (result = (PyStringObject *)
2681 PyString_FromStringAndSize(NULL, result_len)) )
2682 return NULL;
2683
2684 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002685 result_s = PyString_AS_STRING(result);
2686
2687 /* TODO: special case single character, which doesn't need memcpy */
2688
2689 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002690 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002691 result_s += to_len;
2692 count -= 1;
2693
2694 for (i=0; i<count; i++) {
2695 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002696 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697 result_s += to_len;
2698 }
2699
2700 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002701 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702
2703 return result;
2704}
2705
2706/* Special case for deleting a single character */
2707/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002708Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002709replace_delete_single_character(PyStringObject *self,
2710 char from_c, Py_ssize_t maxcount)
2711{
2712 char *self_s, *result_s;
2713 char *start, *next, *end;
2714 Py_ssize_t self_len, result_len;
2715 Py_ssize_t count;
2716 PyStringObject *result;
2717
2718 self_len = PyString_GET_SIZE(self);
2719 self_s = PyString_AS_STRING(self);
2720
Andrew Dalke51324072006-05-26 20:25:22 +00002721 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002722 if (count == 0) {
2723 return return_self(self);
2724 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002725
2726 result_len = self_len - count; /* from_len == 1 */
2727 assert(result_len>=0);
2728
2729 if ( (result = (PyStringObject *)
2730 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2731 return NULL;
2732 result_s = PyString_AS_STRING(result);
2733
2734 start = self_s;
2735 end = self_s + self_len;
2736 while (count-- > 0) {
2737 next = findchar(start, end-start, from_c);
2738 if (next == NULL)
2739 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002740 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741 result_s += (next-start);
2742 start = next+1;
2743 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002744 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002745
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746 return result;
2747}
2748
2749/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2750
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002751Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002752replace_delete_substring(PyStringObject *self,
2753 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002755 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002757 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 Py_ssize_t count, offset;
2759 PyStringObject *result;
2760
2761 self_len = PyString_GET_SIZE(self);
2762 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763
2764 count = countstring(self_s, self_len,
2765 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002766 0, self_len, 1,
2767 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002768
2769 if (count == 0) {
2770 /* no matches */
2771 return return_self(self);
2772 }
2773
2774 result_len = self_len - (count * from_len);
2775 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002776
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002777 if ( (result = (PyStringObject *)
2778 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2779 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002780
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002781 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 start = self_s;
2784 end = self_s + self_len;
2785 while (count-- > 0) {
2786 offset = findstring(start, end-start,
2787 from_s, from_len,
2788 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002789 if (offset == -1)
2790 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002793 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002794
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 result_s += (next-start);
2796 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002797 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002798 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002800}
2801
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002803Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002804replace_single_character_in_place(PyStringObject *self,
2805 char from_c, char to_c,
2806 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002807{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 char *self_s, *result_s, *start, *end, *next;
2809 Py_ssize_t self_len;
2810 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002811
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 /* The result string will be the same size */
2813 self_s = PyString_AS_STRING(self);
2814 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002815
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002817
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 if (next == NULL) {
2819 /* No matches; return the original string */
2820 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002821 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002822
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002824 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 if (result == NULL)
2826 return NULL;
2827 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002828 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002829
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002830 /* change everything in-place, starting with this one */
2831 start = result_s + (next-self_s);
2832 *start = to_c;
2833 start++;
2834 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 while (--maxcount > 0) {
2837 next = findchar(start, end-start, from_c);
2838 if (next == NULL)
2839 break;
2840 *next = to_c;
2841 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002842 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002843
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002845}
2846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002848Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002849replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002850 const char *from_s, Py_ssize_t from_len,
2851 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002852 Py_ssize_t maxcount)
2853{
2854 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002855 char *self_s;
2856 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002857 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002858
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002860
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002861 self_s = PyString_AS_STRING(self);
2862 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002863
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864 offset = findstring(self_s, self_len,
2865 from_s, from_len,
2866 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 if (offset == -1) {
2868 /* No matches; return the original string */
2869 return return_self(self);
2870 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002871
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002873 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 if (result == NULL)
2875 return NULL;
2876 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002877 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002878
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 /* change everything in-place, starting with this one */
2880 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002881 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002882 start += from_len;
2883 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002884
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002885 while ( --maxcount > 0) {
2886 offset = findstring(start, end-start,
2887 from_s, from_len,
2888 0, end-start, FORWARD);
2889 if (offset==-1)
2890 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002891 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002892 start += offset+from_len;
2893 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002894
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 return result;
2896}
2897
2898/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002899Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900replace_single_character(PyStringObject *self,
2901 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002902 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903 Py_ssize_t maxcount)
2904{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002905 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002907 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 Py_ssize_t count, product;
2909 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002910
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911 self_s = PyString_AS_STRING(self);
2912 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002913
Andrew Dalke51324072006-05-26 20:25:22 +00002914 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915 if (count == 0) {
2916 /* no matches, return unchanged */
2917 return return_self(self);
2918 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002919
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920 /* use the difference between current and new, hence the "-1" */
2921 /* result_len = self_len + count * (to_len-1) */
2922 product = count * (to_len-1);
2923 if (product / (to_len-1) != count) {
2924 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2925 return NULL;
2926 }
2927 result_len = self_len + product;
2928 if (result_len < 0) {
2929 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2930 return NULL;
2931 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002932
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002933 if ( (result = (PyStringObject *)
2934 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2935 return NULL;
2936 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002937
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002938 start = self_s;
2939 end = self_s + self_len;
2940 while (count-- > 0) {
2941 next = findchar(start, end-start, from_c);
2942 if (next == NULL)
2943 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002944
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002945 if (next == start) {
2946 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002947 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002948 result_s += to_len;
2949 start += 1;
2950 } else {
2951 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002952 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002954 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002955 result_s += to_len;
2956 start = next+1;
2957 }
2958 }
2959 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002960 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002961
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002962 return result;
2963}
2964
2965/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002966Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002967replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002968 const char *from_s, Py_ssize_t from_len,
2969 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002970 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002971 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002972 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002973 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002974 Py_ssize_t count, offset, product;
2975 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002976
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002977 self_s = PyString_AS_STRING(self);
2978 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002979
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002980 count = countstring(self_s, self_len,
2981 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002982 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 if (count == 0) {
2984 /* no matches, return unchanged */
2985 return return_self(self);
2986 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002987
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002988 /* Check for overflow */
2989 /* result_len = self_len + count * (to_len-from_len) */
2990 product = count * (to_len-from_len);
2991 if (product / (to_len-from_len) != count) {
2992 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2993 return NULL;
2994 }
2995 result_len = self_len + product;
2996 if (result_len < 0) {
2997 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2998 return NULL;
2999 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003000
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003001 if ( (result = (PyStringObject *)
3002 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3003 return NULL;
3004 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003005
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003006 start = self_s;
3007 end = self_s + self_len;
3008 while (count-- > 0) {
3009 offset = findstring(start, end-start,
3010 from_s, from_len,
3011 0, end-start, FORWARD);
3012 if (offset == -1)
3013 break;
3014 next = start+offset;
3015 if (next == start) {
3016 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003017 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003018 result_s += to_len;
3019 start += from_len;
3020 } else {
3021 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003022 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003023 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003024 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003025 result_s += to_len;
3026 start = next+from_len;
3027 }
3028 }
3029 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003030 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003031
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003032 return result;
3033}
3034
3035
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003036Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003037replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 const char *from_s, Py_ssize_t from_len,
3039 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003040 Py_ssize_t maxcount)
3041{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 if (maxcount < 0) {
3043 maxcount = PY_SSIZE_T_MAX;
3044 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3045 /* nothing to do; return the original string */
3046 return return_self(self);
3047 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003048
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003049 if (maxcount == 0 ||
3050 (from_len == 0 && to_len == 0)) {
3051 /* nothing to do; return the original string */
3052 return return_self(self);
3053 }
3054
3055 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003056
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (from_len == 0) {
3058 /* insert the 'to' string everywhere. */
3059 /* >>> "Python".replace("", ".") */
3060 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003061 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 }
3063
3064 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3065 /* point for an empty self string to generate a non-empty string */
3066 /* Special case so the remaining code always gets a non-empty string */
3067 if (PyString_GET_SIZE(self) == 0) {
3068 return return_self(self);
3069 }
3070
3071 if (to_len == 0) {
3072 /* delete all occurances of 'from' string */
3073 if (from_len == 1) {
3074 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003075 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003077 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 }
3079 }
3080
3081 /* Handle special case where both strings have the same length */
3082
3083 if (from_len == to_len) {
3084 if (from_len == 1) {
3085 return replace_single_character_in_place(
3086 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003087 from_s[0],
3088 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089 maxcount);
3090 } else {
3091 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003092 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003093 }
3094 }
3095
3096 /* Otherwise use the more generic algorithms */
3097 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003098 return replace_single_character(self, from_s[0],
3099 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003100 } else {
3101 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003102 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003103 }
3104}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003105
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003106PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003107"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003108\n\
3109Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003110old replaced by new. If the optional argument count is\n\
3111given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003112
3113static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003114string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003115{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003116 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003117 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003118 const char *from_s, *to_s;
3119 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003120
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003121 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003122 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003124 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003125 from_s = PyString_AS_STRING(from);
3126 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003128#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003129 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003130 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003131 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003132#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003133 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134 return NULL;
3135
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003136 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003137 to_s = PyString_AS_STRING(to);
3138 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003140#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003141 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003142 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003143 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003145 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 return NULL;
3147
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003148 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003149 from_s, from_len,
3150 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003151}
3152
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003153/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003154
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003155/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003156 * against substr, using the start and end arguments. Returns
3157 * -1 on error, 0 if not found and 1 if found.
3158 */
3159Py_LOCAL(int)
3160_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3161 Py_ssize_t end, int direction)
3162{
3163 Py_ssize_t len = PyString_GET_SIZE(self);
3164 Py_ssize_t slen;
3165 const char* sub;
3166 const char* str;
3167
3168 if (PyString_Check(substr)) {
3169 sub = PyString_AS_STRING(substr);
3170 slen = PyString_GET_SIZE(substr);
3171 }
3172#ifdef Py_USING_UNICODE
3173 else if (PyUnicode_Check(substr))
3174 return PyUnicode_Tailmatch((PyObject *)self,
3175 substr, start, end, direction);
3176#endif
3177 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3178 return -1;
3179 str = PyString_AS_STRING(self);
3180
3181 string_adjust_indices(&start, &end, len);
3182
3183 if (direction < 0) {
3184 /* startswith */
3185 if (start+slen > len)
3186 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003187 } else {
3188 /* endswith */
3189 if (end-start < slen || start > len)
3190 return 0;
3191
3192 if (end-slen > start)
3193 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003194 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003195 if (end-start >= slen)
3196 return ! memcmp(str+start, sub, slen);
3197 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003198}
3199
3200
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003201PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003202"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003204Return True if S starts with the specified prefix, False otherwise.\n\
3205With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003206With optional end, stop comparing S at that position.\n\
3207prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003208
3209static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003210string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003211{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003212 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003213 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003214 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003215 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216
Guido van Rossumc6821402000-05-08 14:08:05 +00003217 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3218 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003219 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003220 if (PyTuple_Check(subobj)) {
3221 Py_ssize_t i;
3222 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3223 result = _string_tailmatch(self,
3224 PyTuple_GET_ITEM(subobj, i),
3225 start, end, -1);
3226 if (result == -1)
3227 return NULL;
3228 else if (result) {
3229 Py_RETURN_TRUE;
3230 }
3231 }
3232 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 }
Georg Brandl24250812006-06-09 18:45:48 +00003234 result = _string_tailmatch(self, subobj, start, end, -1);
3235 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003236 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003237 else
Georg Brandl24250812006-06-09 18:45:48 +00003238 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003239}
3240
3241
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003242PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003243"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003244\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003245Return True if S ends with the specified suffix, False otherwise.\n\
3246With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003247With optional end, stop comparing S at that position.\n\
3248suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003249
3250static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003251string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003252{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003253 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003254 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003255 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003256 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003257
Guido van Rossumc6821402000-05-08 14:08:05 +00003258 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3259 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003261 if (PyTuple_Check(subobj)) {
3262 Py_ssize_t i;
3263 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3264 result = _string_tailmatch(self,
3265 PyTuple_GET_ITEM(subobj, i),
3266 start, end, +1);
3267 if (result == -1)
3268 return NULL;
3269 else if (result) {
3270 Py_RETURN_TRUE;
3271 }
3272 }
3273 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003274 }
Georg Brandl24250812006-06-09 18:45:48 +00003275 result = _string_tailmatch(self, subobj, start, end, +1);
3276 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003277 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003278 else
Georg Brandl24250812006-06-09 18:45:48 +00003279 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003280}
3281
3282
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003283PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003284"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003285\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003286Encodes S using the codec registered for encoding. encoding defaults\n\
3287to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003288handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003289a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3290'xmlcharrefreplace' as well as any other name registered with\n\
3291codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003292
3293static PyObject *
3294string_encode(PyStringObject *self, PyObject *args)
3295{
3296 char *encoding = NULL;
3297 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003298 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003299
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003300 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3301 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003302 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003303 if (v == NULL)
3304 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003305 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3306 PyErr_Format(PyExc_TypeError,
3307 "encoder did not return a string/unicode object "
3308 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003309 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003310 Py_DECREF(v);
3311 return NULL;
3312 }
3313 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003314
3315 onError:
3316 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003317}
3318
3319
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003320PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003321"S.decode([encoding[,errors]]) -> object\n\
3322\n\
3323Decodes S using the codec registered for encoding. encoding defaults\n\
3324to the default encoding. errors may be given to set a different error\n\
3325handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003326a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3327as well as any other name registerd with codecs.register_error that is\n\
3328able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003329
3330static PyObject *
3331string_decode(PyStringObject *self, PyObject *args)
3332{
3333 char *encoding = NULL;
3334 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003335 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003336
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003337 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3338 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003339 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003340 if (v == NULL)
3341 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003342 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3343 PyErr_Format(PyExc_TypeError,
3344 "decoder did not return a string/unicode object "
3345 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003346 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003347 Py_DECREF(v);
3348 return NULL;
3349 }
3350 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003351
3352 onError:
3353 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003354}
3355
3356
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003357PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358"S.expandtabs([tabsize]) -> string\n\
3359\n\
3360Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003361If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003362
3363static PyObject*
3364string_expandtabs(PyStringObject *self, PyObject *args)
3365{
Guido van Rossum5bdff602008-03-11 21:18:06 +00003366 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 char *q;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003368 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003369 PyObject *u;
3370 int tabsize = 8;
3371
3372 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3373 return NULL;
3374
Thomas Wouters7e474022000-07-16 12:04:32 +00003375 /* First pass: determine size of output string */
Guido van Rossum5bdff602008-03-11 21:18:06 +00003376 i = 0; /* chars up to and including most recent \n or \r */
3377 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3378 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379 for (p = PyString_AS_STRING(self); p < e; p++)
3380 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003381 if (tabsize > 0) {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003382 incr = tabsize - (j % tabsize);
3383 if (j > PY_SSIZE_T_MAX - incr)
3384 goto overflow1;
3385 j += incr;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003386 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 }
3388 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003389 if (j > PY_SSIZE_T_MAX - 1)
3390 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003391 j++;
3392 if (*p == '\n' || *p == '\r') {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003393 if (i > PY_SSIZE_T_MAX - j)
3394 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 i += j;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003396 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003397 }
3398 }
3399
Guido van Rossum5bdff602008-03-11 21:18:06 +00003400 if (i > PY_SSIZE_T_MAX - j)
3401 goto overflow1;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003402
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403 /* Second pass: create output string and fill it */
3404 u = PyString_FromStringAndSize(NULL, i + j);
3405 if (!u)
3406 return NULL;
3407
Guido van Rossum5bdff602008-03-11 21:18:06 +00003408 j = 0; /* same as in first pass */
3409 q = PyString_AS_STRING(u); /* next output char */
3410 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411
3412 for (p = PyString_AS_STRING(self); p < e; p++)
3413 if (*p == '\t') {
3414 if (tabsize > 0) {
3415 i = tabsize - (j % tabsize);
3416 j += i;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003417 while (i--) {
3418 if (q >= qe)
3419 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420 *q++ = ' ';
Guido van Rossum5bdff602008-03-11 21:18:06 +00003421 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422 }
3423 }
3424 else {
Guido van Rossum5bdff602008-03-11 21:18:06 +00003425 if (q >= qe)
3426 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427 *q++ = *p;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003428 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429 if (*p == '\n' || *p == '\r')
3430 j = 0;
3431 }
3432
3433 return u;
Guido van Rossum5bdff602008-03-11 21:18:06 +00003434
3435 overflow2:
3436 Py_DECREF(u);
3437 overflow1:
3438 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3439 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440}
3441
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003442Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003443pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444{
3445 PyObject *u;
3446
3447 if (left < 0)
3448 left = 0;
3449 if (right < 0)
3450 right = 0;
3451
Tim Peters8fa5dd02001-09-12 02:18:30 +00003452 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453 Py_INCREF(self);
3454 return (PyObject *)self;
3455 }
3456
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003457 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458 left + PyString_GET_SIZE(self) + right);
3459 if (u) {
3460 if (left)
3461 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003462 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003463 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464 PyString_GET_SIZE(self));
3465 if (right)
3466 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3467 fill, right);
3468 }
3469
3470 return u;
3471}
3472
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003473PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003475"\n"
3476"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003477"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003478
3479static PyObject *
3480string_ljust(PyStringObject *self, PyObject *args)
3481{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003482 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003483 char fillchar = ' ';
3484
Thomas Wouters4abb3662006-04-19 14:50:15 +00003485 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003486 return NULL;
3487
Tim Peters8fa5dd02001-09-12 02:18:30 +00003488 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003489 Py_INCREF(self);
3490 return (PyObject*) self;
3491 }
3492
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003493 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003494}
3495
3496
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003497PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003498"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003499"\n"
3500"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003501"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502
3503static PyObject *
3504string_rjust(PyStringObject *self, PyObject *args)
3505{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003506 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003507 char fillchar = ' ';
3508
Thomas Wouters4abb3662006-04-19 14:50:15 +00003509 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510 return NULL;
3511
Tim Peters8fa5dd02001-09-12 02:18:30 +00003512 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513 Py_INCREF(self);
3514 return (PyObject*) self;
3515 }
3516
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003517 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518}
3519
3520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003521PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003522"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003523"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003524"Return S centered in a string of length width. Padding is\n"
3525"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526
3527static PyObject *
3528string_center(PyStringObject *self, PyObject *args)
3529{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003530 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003531 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003532 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533
Thomas Wouters4abb3662006-04-19 14:50:15 +00003534 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535 return NULL;
3536
Tim Peters8fa5dd02001-09-12 02:18:30 +00003537 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538 Py_INCREF(self);
3539 return (PyObject*) self;
3540 }
3541
3542 marg = width - PyString_GET_SIZE(self);
3543 left = marg / 2 + (marg & width & 1);
3544
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003545 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546}
3547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003549"S.zfill(width) -> string\n"
3550"\n"
3551"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003552"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003553
3554static PyObject *
3555string_zfill(PyStringObject *self, PyObject *args)
3556{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003557 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003558 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003559 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003560 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003561
Thomas Wouters4abb3662006-04-19 14:50:15 +00003562 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003563 return NULL;
3564
3565 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003566 if (PyString_CheckExact(self)) {
3567 Py_INCREF(self);
3568 return (PyObject*) self;
3569 }
3570 else
3571 return PyString_FromStringAndSize(
3572 PyString_AS_STRING(self),
3573 PyString_GET_SIZE(self)
3574 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003575 }
3576
3577 fill = width - PyString_GET_SIZE(self);
3578
3579 s = pad(self, fill, 0, '0');
3580
3581 if (s == NULL)
3582 return NULL;
3583
3584 p = PyString_AS_STRING(s);
3585 if (p[fill] == '+' || p[fill] == '-') {
3586 /* move sign to beginning of string */
3587 p[0] = p[fill];
3588 p[fill] = '0';
3589 }
3590
3591 return (PyObject*) s;
3592}
3593
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003594PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003595"S.isspace() -> bool\n\
3596\n\
3597Return True if all characters in S are whitespace\n\
3598and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599
3600static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003601string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602{
Fred Drakeba096332000-07-09 07:04:36 +00003603 register const unsigned char *p
3604 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003605 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607 /* Shortcut for single character strings */
3608 if (PyString_GET_SIZE(self) == 1 &&
3609 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003612 /* Special case for empty strings */
3613 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003615
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616 e = p + PyString_GET_SIZE(self);
3617 for (; p < e; p++) {
3618 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622}
3623
3624
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003625PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003627\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003628Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003629and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003630
3631static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003632string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003633{
Fred Drakeba096332000-07-09 07:04:36 +00003634 register const unsigned char *p
3635 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003636 register const unsigned char *e;
3637
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003638 /* Shortcut for single character strings */
3639 if (PyString_GET_SIZE(self) == 1 &&
3640 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003642
3643 /* Special case for empty strings */
3644 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003646
3647 e = p + PyString_GET_SIZE(self);
3648 for (; p < e; p++) {
3649 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003651 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003652 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003653}
3654
3655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003656PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003658\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003659Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003660and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003661
3662static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003663string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003664{
Fred Drakeba096332000-07-09 07:04:36 +00003665 register const unsigned char *p
3666 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003667 register const unsigned char *e;
3668
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003669 /* Shortcut for single character strings */
3670 if (PyString_GET_SIZE(self) == 1 &&
3671 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003673
3674 /* Special case for empty strings */
3675 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003677
3678 e = p + PyString_GET_SIZE(self);
3679 for (; p < e; p++) {
3680 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003682 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003683 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684}
3685
3686
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003687PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003690Return True if all characters in S are digits\n\
3691and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
3693static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003694string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695{
Fred Drakeba096332000-07-09 07:04:36 +00003696 register const unsigned char *p
3697 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003698 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 /* Shortcut for single character strings */
3701 if (PyString_GET_SIZE(self) == 1 &&
3702 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003705 /* Special case for empty strings */
3706 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003708
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 e = p + PyString_GET_SIZE(self);
3710 for (; p < e; p++) {
3711 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003712 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715}
3716
3717
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003718PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003719"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003721Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003722at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723
3724static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003725string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726{
Fred Drakeba096332000-07-09 07:04:36 +00003727 register const unsigned char *p
3728 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003729 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 int cased;
3731
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732 /* Shortcut for single character strings */
3733 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003736 /* Special case for empty strings */
3737 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003738 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003739
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740 e = p + PyString_GET_SIZE(self);
3741 cased = 0;
3742 for (; p < e; p++) {
3743 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 else if (!cased && islower(*p))
3746 cased = 1;
3747 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003748 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749}
3750
3751
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003752PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003753"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003755Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003756at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757
3758static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003759string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760{
Fred Drakeba096332000-07-09 07:04:36 +00003761 register const unsigned char *p
3762 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003763 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764 int cased;
3765
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 /* Shortcut for single character strings */
3767 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003768 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003770 /* Special case for empty strings */
3771 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003772 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003773
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 e = p + PyString_GET_SIZE(self);
3775 cased = 0;
3776 for (; p < e; p++) {
3777 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003778 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 else if (!cased && isupper(*p))
3780 cased = 1;
3781 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003782 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783}
3784
3785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003786PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003787"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003789Return True if S is a titlecased string and there is at least one\n\
3790character in S, i.e. uppercase characters may only follow uncased\n\
3791characters and lowercase characters only cased ones. Return False\n\
3792otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793
3794static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003795string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796{
Fred Drakeba096332000-07-09 07:04:36 +00003797 register const unsigned char *p
3798 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003799 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 int cased, previous_is_cased;
3801
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802 /* Shortcut for single character strings */
3803 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003804 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003806 /* Special case for empty strings */
3807 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003808 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003809
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 e = p + PyString_GET_SIZE(self);
3811 cased = 0;
3812 previous_is_cased = 0;
3813 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003814 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815
3816 if (isupper(ch)) {
3817 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003818 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003819 previous_is_cased = 1;
3820 cased = 1;
3821 }
3822 else if (islower(ch)) {
3823 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003824 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003825 previous_is_cased = 1;
3826 cased = 1;
3827 }
3828 else
3829 previous_is_cased = 0;
3830 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003831 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003832}
3833
3834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003835PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003836"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837\n\
3838Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003839Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003840is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841
Guido van Rossum4c08d552000-03-10 22:55:18 +00003842static PyObject*
3843string_splitlines(PyStringObject *self, PyObject *args)
3844{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003845 register Py_ssize_t i;
3846 register Py_ssize_t j;
3847 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003848 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849 PyObject *list;
3850 PyObject *str;
3851 char *data;
3852
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003853 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003854 return NULL;
3855
3856 data = PyString_AS_STRING(self);
3857 len = PyString_GET_SIZE(self);
3858
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003859 /* This does not use the preallocated list because splitlines is
3860 usually run with hundreds of newlines. The overhead of
3861 switching between PyList_SET_ITEM and append causes about a
3862 2-3% slowdown for that common case. A smarter implementation
3863 could move the if check out, so the SET_ITEMs are done first
3864 and the appends only done when the prealloc buffer is full.
3865 That's too much work for little gain.*/
3866
Guido van Rossum4c08d552000-03-10 22:55:18 +00003867 list = PyList_New(0);
3868 if (!list)
3869 goto onError;
3870
3871 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003872 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003873
Guido van Rossum4c08d552000-03-10 22:55:18 +00003874 /* Find a line and append it */
3875 while (i < len && data[i] != '\n' && data[i] != '\r')
3876 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003877
3878 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003879 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003880 if (i < len) {
3881 if (data[i] == '\r' && i + 1 < len &&
3882 data[i+1] == '\n')
3883 i += 2;
3884 else
3885 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003886 if (keepends)
3887 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003888 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003889 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003890 j = i;
3891 }
3892 if (j < len) {
3893 SPLIT_APPEND(data, j, len);
3894 }
3895
3896 return list;
3897
3898 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003899 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003900 return NULL;
3901}
3902
3903#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003904#undef SPLIT_ADD
3905#undef MAX_PREALLOC
3906#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003907
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003908static PyObject *
3909string_getnewargs(PyStringObject *v)
3910{
Christian Heimese93237d2007-12-19 02:37:44 +00003911 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003912}
3913
Eric Smitha9f7d622008-02-17 19:46:49 +00003914
3915#include "stringlib/string_format.h"
3916
3917PyDoc_STRVAR(format__doc__,
3918"S.format(*args, **kwargs) -> unicode\n\
3919\n\
3920");
3921
3922PyDoc_STRVAR(p_format__doc__,
3923"S.__format__(format_spec) -> unicode\n\
3924\n\
3925");
3926
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003927
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003928static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003929string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003930 /* Counterparts of the obsolete stropmodule functions; except
3931 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003932 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3933 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003934 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003935 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3936 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003937 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3938 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3939 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3940 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3941 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3942 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3943 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003944 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3945 capitalize__doc__},
3946 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3947 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3948 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003949 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003950 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3951 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3952 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3953 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3954 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3955 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3956 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003957 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3958 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003959 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3960 startswith__doc__},
3961 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3962 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3963 swapcase__doc__},
3964 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3965 translate__doc__},
3966 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3967 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3968 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3969 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3970 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Eric Smitha9f7d622008-02-17 19:46:49 +00003971 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3972 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3973 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3974 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003975 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3976 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3977 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3978 expandtabs__doc__},
3979 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3980 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003981 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003982 {NULL, NULL} /* sentinel */
3983};
3984
Jeremy Hylton938ace62002-07-17 16:30:39 +00003985static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003986str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3987
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003988static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003989string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003990{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003991 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003992 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003993
Guido van Rossumae960af2001-08-30 03:11:59 +00003994 if (type != &PyString_Type)
3995 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003996 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3997 return NULL;
3998 if (x == NULL)
3999 return PyString_FromString("");
4000 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004001}
4002
Guido van Rossumae960af2001-08-30 03:11:59 +00004003static PyObject *
4004str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4005{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004006 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004007 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004008
4009 assert(PyType_IsSubtype(type, &PyString_Type));
4010 tmp = string_new(&PyString_Type, args, kwds);
4011 if (tmp == NULL)
4012 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004013 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004014 n = PyString_GET_SIZE(tmp);
4015 pnew = type->tp_alloc(type, n);
4016 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004017 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004018 ((PyStringObject *)pnew)->ob_shash =
4019 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004020 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004021 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004022 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004023 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004024}
4025
Guido van Rossumcacfc072002-05-24 19:01:59 +00004026static PyObject *
4027basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4028{
4029 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004030 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004031 return NULL;
4032}
4033
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004034static PyObject *
4035string_mod(PyObject *v, PyObject *w)
4036{
4037 if (!PyString_Check(v)) {
4038 Py_INCREF(Py_NotImplemented);
4039 return Py_NotImplemented;
4040 }
4041 return PyString_Format(v, w);
4042}
4043
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004044PyDoc_STRVAR(basestring_doc,
4045"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004046
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004047static PyNumberMethods string_as_number = {
4048 0, /*nb_add*/
4049 0, /*nb_subtract*/
4050 0, /*nb_multiply*/
4051 0, /*nb_divide*/
4052 string_mod, /*nb_remainder*/
4053};
4054
4055
Guido van Rossumcacfc072002-05-24 19:01:59 +00004056PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004057 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004058 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004059 0,
4060 0,
4061 0, /* tp_dealloc */
4062 0, /* tp_print */
4063 0, /* tp_getattr */
4064 0, /* tp_setattr */
4065 0, /* tp_compare */
4066 0, /* tp_repr */
4067 0, /* tp_as_number */
4068 0, /* tp_as_sequence */
4069 0, /* tp_as_mapping */
4070 0, /* tp_hash */
4071 0, /* tp_call */
4072 0, /* tp_str */
4073 0, /* tp_getattro */
4074 0, /* tp_setattro */
4075 0, /* tp_as_buffer */
4076 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4077 basestring_doc, /* tp_doc */
4078 0, /* tp_traverse */
4079 0, /* tp_clear */
4080 0, /* tp_richcompare */
4081 0, /* tp_weaklistoffset */
4082 0, /* tp_iter */
4083 0, /* tp_iternext */
4084 0, /* tp_methods */
4085 0, /* tp_members */
4086 0, /* tp_getset */
4087 &PyBaseObject_Type, /* tp_base */
4088 0, /* tp_dict */
4089 0, /* tp_descr_get */
4090 0, /* tp_descr_set */
4091 0, /* tp_dictoffset */
4092 0, /* tp_init */
4093 0, /* tp_alloc */
4094 basestring_new, /* tp_new */
4095 0, /* tp_free */
4096};
4097
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004098PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004099"str(object) -> string\n\
4100\n\
4101Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004102If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004103
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004104PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004105 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004106 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004107 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004108 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004109 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004110 (printfunc)string_print, /* tp_print */
4111 0, /* tp_getattr */
4112 0, /* tp_setattr */
4113 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004114 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004115 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004116 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004117 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004118 (hashfunc)string_hash, /* tp_hash */
4119 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004120 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004121 PyObject_GenericGetAttr, /* tp_getattro */
4122 0, /* tp_setattro */
4123 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004125 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004126 string_doc, /* tp_doc */
4127 0, /* tp_traverse */
4128 0, /* tp_clear */
4129 (richcmpfunc)string_richcompare, /* tp_richcompare */
4130 0, /* tp_weaklistoffset */
4131 0, /* tp_iter */
4132 0, /* tp_iternext */
4133 string_methods, /* tp_methods */
4134 0, /* tp_members */
4135 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004136 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004137 0, /* tp_dict */
4138 0, /* tp_descr_get */
4139 0, /* tp_descr_set */
4140 0, /* tp_dictoffset */
4141 0, /* tp_init */
4142 0, /* tp_alloc */
4143 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004144 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004145};
4146
4147void
Fred Drakeba096332000-07-09 07:04:36 +00004148PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004149{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004150 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004151 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004152 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004153 if (w == NULL || !PyString_Check(*pv)) {
4154 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004155 *pv = NULL;
4156 return;
4157 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004158 v = string_concat((PyStringObject *) *pv, w);
4159 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004160 *pv = v;
4161}
4162
Guido van Rossum013142a1994-08-30 08:19:36 +00004163void
Fred Drakeba096332000-07-09 07:04:36 +00004164PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004165{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004166 PyString_Concat(pv, w);
4167 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004168}
4169
4170
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004171/* The following function breaks the notion that strings are immutable:
4172 it changes the size of a string. We get away with this only if there
4173 is only one module referencing the object. You can also think of it
4174 as creating a new string object and destroying the old one, only
4175 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004176 already be known to some other part of the code...
4177 Note that if there's not enough memory to resize the string, the original
4178 string object at *pv is deallocated, *pv is set to NULL, an "out of
4179 memory" exception is set, and -1 is returned. Else (on success) 0 is
4180 returned, and the value in *pv may or may not be the same as on input.
4181 As always, an extra byte is allocated for a trailing \0 byte (newsize
4182 does *not* include that), and a trailing \0 byte is stored.
4183*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004184
4185int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004186_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004187{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004188 register PyObject *v;
4189 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004190 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004191 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004192 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004193 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004194 Py_DECREF(v);
4195 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004196 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004197 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004198 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004199 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004200 _Py_ForgetReference(v);
4201 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004202 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004203 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004204 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004205 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004206 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004207 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004208 _Py_NewReference(*pv);
4209 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004210 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004211 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004212 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004213 return 0;
4214}
Guido van Rossume5372401993-03-16 12:15:04 +00004215
4216/* Helpers for formatstring */
4217
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004218Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004219getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004220{
Thomas Wouters977485d2006-02-16 15:59:12 +00004221 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004222 if (argidx < arglen) {
4223 (*p_argidx)++;
4224 if (arglen < 0)
4225 return args;
4226 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004227 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004228 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004229 PyErr_SetString(PyExc_TypeError,
4230 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004231 return NULL;
4232}
4233
Tim Peters38fd5b62000-09-21 05:43:11 +00004234/* Format codes
4235 * F_LJUST '-'
4236 * F_SIGN '+'
4237 * F_BLANK ' '
4238 * F_ALT '#'
4239 * F_ZERO '0'
4240 */
Guido van Rossume5372401993-03-16 12:15:04 +00004241#define F_LJUST (1<<0)
4242#define F_SIGN (1<<1)
4243#define F_BLANK (1<<2)
4244#define F_ALT (1<<3)
4245#define F_ZERO (1<<4)
4246
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004247Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004248formatfloat(char *buf, size_t buflen, int flags,
4249 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004250{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004251 /* fmt = '%#.' + `prec` + `type`
4252 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004253 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004254 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004255 x = PyFloat_AsDouble(v);
4256 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004257 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004258 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004259 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004260 }
Guido van Rossume5372401993-03-16 12:15:04 +00004261 if (prec < 0)
4262 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004263 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4264 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004265 /* Worst case length calc to ensure no buffer overrun:
4266
4267 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004268 fmt = %#.<prec>g
4269 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004270 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004271 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004272
4273 'f' formats:
4274 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4275 len = 1 + 50 + 1 + prec = 52 + prec
4276
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004277 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004278 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004279
4280 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004281 if (((type == 'g' || type == 'G') &&
4282 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004283 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004284 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004285 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004286 return -1;
4287 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004288 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4289 (flags&F_ALT) ? "#" : "",
4290 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004291 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004292 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004293}
4294
Tim Peters38fd5b62000-09-21 05:43:11 +00004295/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4296 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4297 * Python's regular ints.
4298 * Return value: a new PyString*, or NULL if error.
4299 * . *pbuf is set to point into it,
4300 * *plen set to the # of chars following that.
4301 * Caller must decref it when done using pbuf.
4302 * The string starting at *pbuf is of the form
4303 * "-"? ("0x" | "0X")? digit+
4304 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004305 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004306 * There will be at least prec digits, zero-filled on the left if
4307 * necessary to get that many.
4308 * val object to be converted
4309 * flags bitmask of format flags; only F_ALT is looked at
4310 * prec minimum number of digits; 0-fill on left if needed
4311 * type a character in [duoxX]; u acts the same as d
4312 *
4313 * CAUTION: o, x and X conversions on regular ints can never
4314 * produce a '-' sign, but can for Python's unbounded ints.
4315 */
4316PyObject*
4317_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4318 char **pbuf, int *plen)
4319{
4320 PyObject *result = NULL;
4321 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004322 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004323 int sign; /* 1 if '-', else 0 */
4324 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004325 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004326 int numdigits; /* len == numnondigits + numdigits */
4327 int numnondigits = 0;
4328
4329 switch (type) {
4330 case 'd':
4331 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004332 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004333 break;
4334 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004335 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004336 break;
4337 case 'x':
4338 case 'X':
4339 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004340 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004341 break;
4342 default:
4343 assert(!"'type' not in [duoxX]");
4344 }
4345 if (!result)
4346 return NULL;
4347
Neal Norwitz56423e52006-08-13 18:11:08 +00004348 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004349 if (!buf) {
4350 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004351 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004352 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004353
Tim Peters38fd5b62000-09-21 05:43:11 +00004354 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004355 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004356 PyErr_BadInternalCall();
4357 return NULL;
4358 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004359 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004360 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004361 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4362 return NULL;
4363 }
4364 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004365 if (buf[len-1] == 'L') {
4366 --len;
4367 buf[len] = '\0';
4368 }
4369 sign = buf[0] == '-';
4370 numnondigits += sign;
4371 numdigits = len - numnondigits;
4372 assert(numdigits > 0);
4373
Tim Petersfff53252001-04-12 18:38:48 +00004374 /* Get rid of base marker unless F_ALT */
4375 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004376 /* Need to skip 0x, 0X or 0. */
4377 int skipped = 0;
4378 switch (type) {
4379 case 'o':
4380 assert(buf[sign] == '0');
4381 /* If 0 is only digit, leave it alone. */
4382 if (numdigits > 1) {
4383 skipped = 1;
4384 --numdigits;
4385 }
4386 break;
4387 case 'x':
4388 case 'X':
4389 assert(buf[sign] == '0');
4390 assert(buf[sign + 1] == 'x');
4391 skipped = 2;
4392 numnondigits -= 2;
4393 break;
4394 }
4395 if (skipped) {
4396 buf += skipped;
4397 len -= skipped;
4398 if (sign)
4399 buf[0] = '-';
4400 }
4401 assert(len == numnondigits + numdigits);
4402 assert(numdigits > 0);
4403 }
4404
4405 /* Fill with leading zeroes to meet minimum width. */
4406 if (prec > numdigits) {
4407 PyObject *r1 = PyString_FromStringAndSize(NULL,
4408 numnondigits + prec);
4409 char *b1;
4410 if (!r1) {
4411 Py_DECREF(result);
4412 return NULL;
4413 }
4414 b1 = PyString_AS_STRING(r1);
4415 for (i = 0; i < numnondigits; ++i)
4416 *b1++ = *buf++;
4417 for (i = 0; i < prec - numdigits; i++)
4418 *b1++ = '0';
4419 for (i = 0; i < numdigits; i++)
4420 *b1++ = *buf++;
4421 *b1 = '\0';
4422 Py_DECREF(result);
4423 result = r1;
4424 buf = PyString_AS_STRING(result);
4425 len = numnondigits + prec;
4426 }
4427
4428 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004429 if (type == 'X') {
4430 /* Need to convert all lower case letters to upper case.
4431 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004432 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004433 if (buf[i] >= 'a' && buf[i] <= 'x')
4434 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004435 }
4436 *pbuf = buf;
4437 *plen = len;
4438 return result;
4439}
4440
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004441Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004442formatint(char *buf, size_t buflen, int flags,
4443 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004444{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004445 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004446 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4447 + 1 + 1 = 24 */
4448 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004449 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004450 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004451
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004452 x = PyInt_AsLong(v);
4453 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004454 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004455 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004456 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004457 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004458 if (x < 0 && type == 'u') {
4459 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004460 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004461 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4462 sign = "-";
4463 else
4464 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004465 if (prec < 0)
4466 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004467
4468 if ((flags & F_ALT) &&
4469 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004470 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004471 * of issues that cause pain:
4472 * - when 0 is being converted, the C standard leaves off
4473 * the '0x' or '0X', which is inconsistent with other
4474 * %#x/%#X conversions and inconsistent with Python's
4475 * hex() function
4476 * - there are platforms that violate the standard and
4477 * convert 0 with the '0x' or '0X'
4478 * (Metrowerks, Compaq Tru64)
4479 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004480 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004481 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004482 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004483 * We can achieve the desired consistency by inserting our
4484 * own '0x' or '0X' prefix, and substituting %x/%X in place
4485 * of %#x/%#X.
4486 *
4487 * Note that this is the same approach as used in
4488 * formatint() in unicodeobject.c
4489 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004490 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4491 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004492 }
4493 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004494 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4495 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004496 prec, type);
4497 }
4498
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004499 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4500 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004501 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004502 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004503 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004504 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004505 return -1;
4506 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004507 if (sign[0])
4508 PyOS_snprintf(buf, buflen, fmt, -x);
4509 else
4510 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004511 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004512}
4513
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004514Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004515formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004516{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004517 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 if (PyString_Check(v)) {
4519 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004520 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004521 }
4522 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004523 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004524 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004525 }
4526 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004527 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004528}
4529
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004530/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4531
4532 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4533 chars are formatted. XXX This is a magic number. Each formatting
4534 routine does bounds checking to ensure no overflow, but a better
4535 solution may be to malloc a buffer of appropriate size for each
4536 format. For now, the current solution is sufficient.
4537*/
4538#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004539
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004540PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004541PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004542{
4543 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004544 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004545 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004546 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004547 PyObject *result, *orig_args;
4548#ifdef Py_USING_UNICODE
4549 PyObject *v, *w;
4550#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004551 PyObject *dict = NULL;
4552 if (format == NULL || !PyString_Check(format) || args == NULL) {
4553 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004554 return NULL;
4555 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004556 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004557 fmt = PyString_AS_STRING(format);
4558 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004559 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004560 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004561 if (result == NULL)
4562 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004563 res = PyString_AsString(result);
4564 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004565 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004566 argidx = 0;
4567 }
4568 else {
4569 arglen = -1;
4570 argidx = -2;
4571 }
Christian Heimese93237d2007-12-19 02:37:44 +00004572 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004573 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004574 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004575 while (--fmtcnt >= 0) {
4576 if (*fmt != '%') {
4577 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004578 rescnt = fmtcnt + 100;
4579 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004581 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004582 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004583 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004584 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004585 }
4586 *res++ = *fmt++;
4587 }
4588 else {
4589 /* Got a format specifier */
4590 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004591 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004592 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004593 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004594 int fill;
Facundo Batistac11cecf2008-02-24 03:17:21 +00004595 int isnumok;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 PyObject *v = NULL;
4597 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004598 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004599 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004600 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004601 char formatbuf[FORMATBUFLEN];
4602 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004603#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004604 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004605 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004606#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004607
Guido van Rossumda9c2711996-12-05 21:58:58 +00004608 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004609 if (*fmt == '(') {
4610 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004611 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004612 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004613 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004614
4615 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004616 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004617 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004618 goto error;
4619 }
4620 ++fmt;
4621 --fmtcnt;
4622 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004623 /* Skip over balanced parentheses */
4624 while (pcount > 0 && --fmtcnt >= 0) {
4625 if (*fmt == ')')
4626 --pcount;
4627 else if (*fmt == '(')
4628 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004629 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004630 }
4631 keylen = fmt - keystart - 1;
4632 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004634 "incomplete format key");
4635 goto error;
4636 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004637 key = PyString_FromStringAndSize(keystart,
4638 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004639 if (key == NULL)
4640 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004641 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004642 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004643 args_owned = 0;
4644 }
4645 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004646 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004647 if (args == NULL) {
4648 goto error;
4649 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004650 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004651 arglen = -1;
4652 argidx = -2;
4653 }
Guido van Rossume5372401993-03-16 12:15:04 +00004654 while (--fmtcnt >= 0) {
4655 switch (c = *fmt++) {
4656 case '-': flags |= F_LJUST; continue;
4657 case '+': flags |= F_SIGN; continue;
4658 case ' ': flags |= F_BLANK; continue;
4659 case '#': flags |= F_ALT; continue;
4660 case '0': flags |= F_ZERO; continue;
4661 }
4662 break;
4663 }
4664 if (c == '*') {
4665 v = getnextarg(args, arglen, &argidx);
4666 if (v == NULL)
4667 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004668 if (!PyInt_Check(v)) {
4669 PyErr_SetString(PyExc_TypeError,
4670 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004671 goto error;
4672 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004673 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004674 if (width < 0) {
4675 flags |= F_LJUST;
4676 width = -width;
4677 }
Guido van Rossume5372401993-03-16 12:15:04 +00004678 if (--fmtcnt >= 0)
4679 c = *fmt++;
4680 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004681 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004682 width = c - '0';
4683 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004684 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004685 if (!isdigit(c))
4686 break;
4687 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004688 PyErr_SetString(
4689 PyExc_ValueError,
4690 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004691 goto error;
4692 }
4693 width = width*10 + (c - '0');
4694 }
4695 }
4696 if (c == '.') {
4697 prec = 0;
4698 if (--fmtcnt >= 0)
4699 c = *fmt++;
4700 if (c == '*') {
4701 v = getnextarg(args, arglen, &argidx);
4702 if (v == NULL)
4703 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004704 if (!PyInt_Check(v)) {
4705 PyErr_SetString(
4706 PyExc_TypeError,
4707 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004708 goto error;
4709 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004710 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004711 if (prec < 0)
4712 prec = 0;
4713 if (--fmtcnt >= 0)
4714 c = *fmt++;
4715 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004716 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004717 prec = c - '0';
4718 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004719 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004720 if (!isdigit(c))
4721 break;
4722 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004723 PyErr_SetString(
4724 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004725 "prec too big");
4726 goto error;
4727 }
4728 prec = prec*10 + (c - '0');
4729 }
4730 }
4731 } /* prec */
4732 if (fmtcnt >= 0) {
4733 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004734 if (--fmtcnt >= 0)
4735 c = *fmt++;
4736 }
4737 }
4738 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004739 PyErr_SetString(PyExc_ValueError,
4740 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004741 goto error;
4742 }
4743 if (c != '%') {
4744 v = getnextarg(args, arglen, &argidx);
4745 if (v == NULL)
4746 goto error;
4747 }
4748 sign = 0;
4749 fill = ' ';
4750 switch (c) {
4751 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004752 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004753 len = 1;
4754 break;
4755 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004756#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004757 if (PyUnicode_Check(v)) {
4758 fmt = fmt_start;
4759 argidx = argidx_start;
4760 goto unicode;
4761 }
Georg Brandld45014b2005-10-01 17:06:00 +00004762#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004763 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004764#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004765 if (temp != NULL && PyUnicode_Check(temp)) {
4766 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004767 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004768 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004769 goto unicode;
4770 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004771#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004772 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004773 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004774 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004775 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004776 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004777 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004778 if (!PyString_Check(temp)) {
4779 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004780 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004781 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004782 goto error;
4783 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004784 pbuf = PyString_AS_STRING(temp);
4785 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004786 if (prec >= 0 && len > prec)
4787 len = prec;
4788 break;
4789 case 'i':
4790 case 'd':
4791 case 'u':
4792 case 'o':
4793 case 'x':
4794 case 'X':
4795 if (c == 'i')
4796 c = 'd';
Facundo Batistac11cecf2008-02-24 03:17:21 +00004797 isnumok = 0;
4798 if (PyNumber_Check(v)) {
4799 PyObject *iobj=NULL;
4800
4801 if (PyInt_Check(v) || (PyLong_Check(v))) {
4802 iobj = v;
4803 Py_INCREF(iobj);
4804 }
4805 else {
4806 iobj = PyNumber_Int(v);
4807 if (iobj==NULL) iobj = PyNumber_Long(v);
4808 }
4809 if (iobj!=NULL) {
4810 if (PyInt_Check(iobj)) {
4811 isnumok = 1;
4812 pbuf = formatbuf;
4813 len = formatint(pbuf,
4814 sizeof(formatbuf),
4815 flags, prec, c, iobj);
4816 Py_DECREF(iobj);
4817 if (len < 0)
4818 goto error;
4819 sign = 1;
4820 }
4821 else if (PyLong_Check(iobj)) {
4822 int ilen;
4823
4824 isnumok = 1;
4825 temp = _PyString_FormatLong(iobj, flags,
4826 prec, c, &pbuf, &ilen);
4827 Py_DECREF(iobj);
4828 len = ilen;
4829 if (!temp)
4830 goto error;
4831 sign = 1;
4832 }
4833 else {
4834 Py_DECREF(iobj);
4835 }
4836 }
Guido van Rossum4acdc231997-01-29 06:00:24 +00004837 }
Facundo Batistac11cecf2008-02-24 03:17:21 +00004838 if (!isnumok) {
4839 PyErr_Format(PyExc_TypeError,
4840 "%%%c format: a number is required, "
4841 "not %.200s", c, Py_TYPE(v)->tp_name);
4842 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004843 }
4844 if (flags & F_ZERO)
4845 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004846 break;
4847 case 'e':
4848 case 'E':
4849 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004850 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004851 case 'g':
4852 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004853 if (c == 'F')
4854 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004855 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004856 len = formatfloat(pbuf, sizeof(formatbuf),
4857 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004858 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004859 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004860 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004861 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004862 fill = '0';
4863 break;
4864 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004865#ifdef Py_USING_UNICODE
4866 if (PyUnicode_Check(v)) {
4867 fmt = fmt_start;
4868 argidx = argidx_start;
4869 goto unicode;
4870 }
4871#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004872 pbuf = formatbuf;
4873 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004874 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004875 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004876 break;
4877 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004878 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004879 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004880 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004881 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004882 (Py_ssize_t)(fmt - 1 -
4883 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004884 goto error;
4885 }
4886 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004887 if (*pbuf == '-' || *pbuf == '+') {
4888 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004889 len--;
4890 }
4891 else if (flags & F_SIGN)
4892 sign = '+';
4893 else if (flags & F_BLANK)
4894 sign = ' ';
4895 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004896 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004897 }
4898 if (width < len)
4899 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004900 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004901 reslen -= rescnt;
4902 rescnt = width + fmtcnt + 100;
4903 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004904 if (reslen < 0) {
4905 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004906 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004907 return PyErr_NoMemory();
4908 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004909 if (_PyString_Resize(&result, reslen) < 0) {
4910 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004911 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004912 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004913 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004914 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004915 }
4916 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004917 if (fill != ' ')
4918 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004919 rescnt--;
4920 if (width > len)
4921 width--;
4922 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004923 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4924 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004925 assert(pbuf[1] == c);
4926 if (fill != ' ') {
4927 *res++ = *pbuf++;
4928 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004929 }
Tim Petersfff53252001-04-12 18:38:48 +00004930 rescnt -= 2;
4931 width -= 2;
4932 if (width < 0)
4933 width = 0;
4934 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004935 }
4936 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004937 do {
4938 --rescnt;
4939 *res++ = fill;
4940 } while (--width > len);
4941 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004942 if (fill == ' ') {
4943 if (sign)
4944 *res++ = sign;
4945 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004946 (c == 'x' || c == 'X')) {
4947 assert(pbuf[0] == '0');
4948 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004949 *res++ = *pbuf++;
4950 *res++ = *pbuf++;
4951 }
4952 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004953 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004954 res += len;
4955 rescnt -= len;
4956 while (--width >= len) {
4957 --rescnt;
4958 *res++ = ' ';
4959 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004960 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004961 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004962 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004963 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004964 goto error;
4965 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004966 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004967 } /* '%' */
4968 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004969 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004970 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004971 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004972 goto error;
4973 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004974 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004975 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004977 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004978 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004979
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004980#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004981 unicode:
4982 if (args_owned) {
4983 Py_DECREF(args);
4984 args_owned = 0;
4985 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004986 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004987 if (PyTuple_Check(orig_args) && argidx > 0) {
4988 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004989 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004990 v = PyTuple_New(n);
4991 if (v == NULL)
4992 goto error;
4993 while (--n >= 0) {
4994 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4995 Py_INCREF(w);
4996 PyTuple_SET_ITEM(v, n, w);
4997 }
4998 args = v;
4999 } else {
5000 Py_INCREF(orig_args);
5001 args = orig_args;
5002 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005003 args_owned = 1;
5004 /* Take what we have of the result and let the Unicode formatting
5005 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00005006 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005007 if (_PyString_Resize(&result, rescnt))
5008 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00005009 fmtcnt = PyString_GET_SIZE(format) - \
5010 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005011 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5012 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00005013 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005014 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00005015 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005016 if (v == NULL)
5017 goto error;
5018 /* Paste what we have (result) to what the Unicode formatting
5019 function returned (v) and return the result (or error) */
5020 w = PyUnicode_Concat(result, v);
5021 Py_DECREF(result);
5022 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005023 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005024 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005025#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005026
Guido van Rossume5372401993-03-16 12:15:04 +00005027 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005028 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005029 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005030 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005031 }
Guido van Rossume5372401993-03-16 12:15:04 +00005032 return NULL;
5033}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005034
Guido van Rossum2a61e741997-01-18 07:55:05 +00005035void
Fred Drakeba096332000-07-09 07:04:36 +00005036PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005037{
5038 register PyStringObject *s = (PyStringObject *)(*p);
5039 PyObject *t;
5040 if (s == NULL || !PyString_Check(s))
5041 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005042 /* If it's a string subclass, we don't really know what putting
5043 it in the interned dict might do. */
5044 if (!PyString_CheckExact(s))
5045 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005046 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005047 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005048 if (interned == NULL) {
5049 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005050 if (interned == NULL) {
5051 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005052 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005053 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005054 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005055 t = PyDict_GetItem(interned, (PyObject *)s);
5056 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005057 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005058 Py_DECREF(*p);
5059 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005060 return;
5061 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005062
Armin Rigo79f7ad22004-08-07 19:27:39 +00005063 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005064 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005065 return;
5066 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005067 /* The two references in interned are not counted by refcnt.
5068 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005069 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005070 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005071}
5072
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005073void
5074PyString_InternImmortal(PyObject **p)
5075{
5076 PyString_InternInPlace(p);
5077 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5078 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5079 Py_INCREF(*p);
5080 }
5081}
5082
Guido van Rossum2a61e741997-01-18 07:55:05 +00005083
5084PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005085PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005086{
5087 PyObject *s = PyString_FromString(cp);
5088 if (s == NULL)
5089 return NULL;
5090 PyString_InternInPlace(&s);
5091 return s;
5092}
5093
Guido van Rossum8cf04761997-08-02 02:57:45 +00005094void
Fred Drakeba096332000-07-09 07:04:36 +00005095PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005096{
5097 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005098 for (i = 0; i < UCHAR_MAX + 1; i++) {
5099 Py_XDECREF(characters[i]);
5100 characters[i] = NULL;
5101 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005102 Py_XDECREF(nullstring);
5103 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005104}
Barry Warsawa903ad982001-02-23 16:40:48 +00005105
Barry Warsawa903ad982001-02-23 16:40:48 +00005106void _Py_ReleaseInternedStrings(void)
5107{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005108 PyObject *keys;
5109 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005110 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005111 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005112
5113 if (interned == NULL || !PyDict_Check(interned))
5114 return;
5115 keys = PyDict_Keys(interned);
5116 if (keys == NULL || !PyList_Check(keys)) {
5117 PyErr_Clear();
5118 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005119 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005120
5121 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5122 detector, interned strings are not forcibly deallocated; rather, we
5123 give them their stolen references back, and then clear and DECREF
5124 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005125
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005126 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005127 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5128 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005129 for (i = 0; i < n; i++) {
5130 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5131 switch (s->ob_sstate) {
5132 case SSTATE_NOT_INTERNED:
5133 /* XXX Shouldn't happen */
5134 break;
5135 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005136 Py_REFCNT(s) += 1;
5137 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005138 break;
5139 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005140 Py_REFCNT(s) += 2;
5141 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005142 break;
5143 default:
5144 Py_FatalError("Inconsistent interned string state.");
5145 }
5146 s->ob_sstate = SSTATE_NOT_INTERNED;
5147 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005148 fprintf(stderr, "total size of all interned strings: "
5149 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5150 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005151 Py_DECREF(keys);
5152 PyDict_Clear(interned);
5153 Py_DECREF(interned);
5154 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005155}