blob: e1e287fba11a4e185ca62b71d7c292a83d77a8ce [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
755 "%.200s found", obj->ob_type->tp_name);
756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000793 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000809 char *data = op->ob_sval;
810 Py_ssize_t size = op->ob_size;
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000822 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000824 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000825#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000831 if (memchr(op->ob_sval, '\'', op->ob_size) &&
832 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
835 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 for (i = 0; i < op->ob_size; i++) {
837 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000844 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 fprintf(fp, "\\r");
846 else if (c < ' ' || c >= 0x7f)
847 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000848 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000849 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000852 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853}
854
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000855PyObject *
856PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000859 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000861 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000862 PyErr_SetString(PyExc_OverflowError,
863 "string is too large to make repr");
Guido van Rossume6a6f392007-11-07 01:19:49 +0000864 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000865 }
866 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000868 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 }
870 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000871 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 register char c;
873 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 int quote;
875
Thomas Wouters7e474022000-07-16 12:04:32 +0000876 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000877 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000878 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000879 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000880 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 quote = '"';
882
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000886 /* There's at least enough room for a hex escape
887 and a closing quote. */
888 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000889 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000890 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000892 else if (c == '\t')
893 *p++ = '\\', *p++ = 't';
894 else if (c == '\n')
895 *p++ = '\\', *p++ = 'n';
896 else if (c == '\r')
897 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000898 else if (c < ' ' || c >= 0x7f) {
899 /* For performance, we don't want to call
900 PyOS_snprintf here (extra layers of
901 function call). */
902 sprintf(p, "\\x%02x", c & 0xff);
903 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000904 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000905 else
906 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000908 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000909 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000912 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915}
916
Guido van Rossum189f1df2001-05-01 16:51:53 +0000917static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000918string_repr(PyObject *op)
919{
920 return PyString_Repr(op, 1);
921}
922
923static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000924string_str(PyObject *s)
925{
Tim Petersc9933152001-10-16 20:18:24 +0000926 assert(PyString_Check(s));
927 if (PyString_CheckExact(s)) {
928 Py_INCREF(s);
929 return s;
930 }
931 else {
932 /* Subtype -- return genuine string with the same value. */
933 PyStringObject *t = (PyStringObject *) s;
934 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
935 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000936}
937
Martin v. Löwis18e16552006-02-15 17:27:45 +0000938static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000939string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940{
941 return a->ob_size;
942}
943
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000945string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946{
Andrew Dalke598710c2006-05-25 18:18:39 +0000947 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 register PyStringObject *op;
949 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000951 if (PyUnicode_Check(bb))
952 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000953#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000954 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000955 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000956 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 return NULL;
958 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000959#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000960 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000961 if ((a->ob_size == 0 || b->ob_size == 0) &&
962 PyString_CheckExact(a) && PyString_CheckExact(b)) {
963 if (a->ob_size == 0) {
964 Py_INCREF(bb);
965 return bb;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 Py_INCREF(a);
968 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000969 }
970 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000971 if (size < 0) {
972 PyErr_SetString(PyExc_OverflowError,
973 "strings are too large to concat");
974 return NULL;
975 }
976
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000977 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000978 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000979 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000981 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000982 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000983 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000984 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
985 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000986 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988#undef b
989}
990
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000992string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000994 register Py_ssize_t i;
995 register Py_ssize_t j;
996 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000998 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 if (n < 0)
1000 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001001 /* watch out for overflows: the size can overflow int,
1002 * and the # of bytes needed can overflow size_t
1003 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001005 if (n && size / n != a->ob_size) {
1006 PyErr_SetString(PyExc_OverflowError,
1007 "repeated string is too long");
1008 return NULL;
1009 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001010 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011 Py_INCREF(a);
1012 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001013 }
Tim Peterse7c05322004-06-27 17:24:49 +00001014 nbytes = (size_t)size;
1015 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001016 PyErr_SetString(PyExc_OverflowError,
1017 "repeated string is too long");
1018 return NULL;
1019 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001021 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001022 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001024 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001025 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001026 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001027 op->ob_sval[size] = '\0';
1028 if (a->ob_size == 1 && n > 0) {
1029 memset(op->ob_sval, a->ob_sval[0] , n);
1030 return (PyObject *) op;
1031 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001032 i = 0;
1033 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001034 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001035 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001036 }
1037 while (i < size) {
1038 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001039 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i += j;
1041 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
1045/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1046
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001047static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001048string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001049 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001050 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051{
1052 if (i < 0)
1053 i = 0;
1054 if (j < 0)
1055 j = 0; /* Avoid signed/unsigned bug in next line */
1056 if (j > a->ob_size)
1057 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001058 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1059 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001060 Py_INCREF(a);
1061 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001062 }
1063 if (j < i)
1064 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001065 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001066}
1067
Guido van Rossum9284a572000-03-07 15:53:43 +00001068static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001069string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001070{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001071 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (PyUnicode_Check(sub_obj))
1074 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001075#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001076 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001077 PyErr_SetString(PyExc_TypeError,
1078 "'in <string>' requires string as left operand");
1079 return -1;
1080 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001081 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001082
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001084}
1085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001086static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001087string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001089 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001092 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return NULL;
1094 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001095 pchar = a->ob_sval[i];
1096 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001097 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001099 else {
1100#ifdef COUNT_ALLOCS
1101 one_strings++;
1102#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001103 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001105 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001106}
1107
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108static PyObject*
1109string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112 Py_ssize_t len_a, len_b;
1113 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 PyObject *result;
1115
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 result = Py_NotImplemented;
1119 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001120 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121 if (a == b) {
1122 switch (op) {
1123 case Py_EQ:case Py_LE:case Py_GE:
1124 result = Py_True;
1125 goto out;
1126 case Py_NE:case Py_LT:case Py_GT:
1127 result = Py_False;
1128 goto out;
1129 }
1130 }
1131 if (op == Py_EQ) {
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a->ob_size == b->ob_size
1135 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001136 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 a->ob_size) == 0)) {
1138 result = Py_True;
1139 } else {
1140 result = Py_False;
1141 }
1142 goto out;
1143 }
1144 len_a = a->ob_size; len_b = b->ob_size;
1145 min_len = (len_a < len_b) ? len_a : len_b;
1146 if (min_len > 0) {
1147 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1148 if (c==0)
1149 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1150 }else
1151 c = 0;
1152 if (c == 0)
1153 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1154 switch (op) {
1155 case Py_LT: c = c < 0; break;
1156 case Py_LE: c = c <= 0; break;
1157 case Py_EQ: assert(0); break; /* unreachable */
1158 case Py_NE: c = c != 0; break;
1159 case Py_GT: c = c > 0; break;
1160 case Py_GE: c = c >= 0; break;
1161 default:
1162 result = Py_NotImplemented;
1163 goto out;
1164 }
1165 result = c ? Py_True : Py_False;
1166 out:
1167 Py_INCREF(result);
1168 return result;
1169}
1170
1171int
1172_PyString_Eq(PyObject *o1, PyObject *o2)
1173{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001174 PyStringObject *a = (PyStringObject*) o1;
1175 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001176 return a->ob_size == b->ob_size
1177 && *a->ob_sval == *b->ob_sval
1178 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001179}
1180
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181static long
Fred Drakeba096332000-07-09 07:04:36 +00001182string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001184 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 register unsigned char *p;
1186 register long x;
1187
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001188 if (a->ob_shash != -1)
1189 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001190 len = a->ob_size;
1191 p = (unsigned char *) a->ob_sval;
1192 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001194 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001195 x ^= a->ob_size;
1196 if (x == -1)
1197 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001198 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001199 return x;
1200}
1201
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001202static PyObject*
1203string_subscript(PyStringObject* self, PyObject* item)
1204{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001205 if (PyIndex_Check(item)) {
1206 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001207 if (i == -1 && PyErr_Occurred())
1208 return NULL;
1209 if (i < 0)
1210 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001211 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 }
1213 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001214 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 char* source_buf;
1216 char* result_buf;
1217 PyObject* result;
1218
Tim Petersae1d0c92006-03-17 03:29:34 +00001219 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 PyString_GET_SIZE(self),
1221 &start, &stop, &step, &slicelength) < 0) {
1222 return NULL;
1223 }
1224
1225 if (slicelength <= 0) {
1226 return PyString_FromStringAndSize("", 0);
1227 }
1228 else {
1229 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001230 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001231 if (result_buf == NULL)
1232 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233
Tim Petersae1d0c92006-03-17 03:29:34 +00001234 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001235 cur += step, i++) {
1236 result_buf[i] = source_buf[cur];
1237 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001238
1239 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001240 slicelength);
1241 PyMem_Free(result_buf);
1242 return result;
1243 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001244 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001245 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001246 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 "string indices must be integers");
1248 return NULL;
1249 }
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
1255 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001257 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259 }
1260 *ptr = (void *)self->ob_sval;
1261 return self->ob_size;
1262}
1263
Martin v. Löwis18e16552006-02-15 17:27:45 +00001264static Py_ssize_t
1265string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266{
Guido van Rossum045e6881997-09-08 18:30:11 +00001267 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001268 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001269 return -1;
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( lenp )
1276 *lenp = self->ob_size;
1277 return 1;
1278}
1279
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280static Py_ssize_t
1281string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001282{
1283 if ( index != 0 ) {
1284 PyErr_SetString(PyExc_SystemError,
1285 "accessing non-existent string segment");
1286 return -1;
1287 }
1288 *ptr = self->ob_sval;
1289 return self->ob_size;
1290}
1291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001292static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001294 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (ssizeargfunc)string_repeat, /*sq_repeat*/
1296 (ssizeargfunc)string_item, /*sq_item*/
1297 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001298 0, /*sq_ass_item*/
1299 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001300 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001301};
1302
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001305 (binaryfunc)string_subscript,
1306 0,
1307};
1308
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001309static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 (readbufferproc)string_buffer_getreadbuf,
1311 (writebufferproc)string_buffer_getwritebuf,
1312 (segcountproc)string_buffer_getsegcount,
1313 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001314};
1315
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316
1317
1318#define LEFTSTRIP 0
1319#define RIGHTSTRIP 1
1320#define BOTHSTRIP 2
1321
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001322/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001323static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1324
1325#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001326
Andrew Dalke525eab32006-05-26 14:00:45 +00001327
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001328/* Don't call if length < 2 */
1329#define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1333
1334
Andrew Dalke525eab32006-05-26 14:00:45 +00001335/* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1341
1342#define MAX_PREALLOC 12
1343
1344/* 5 splits gives 6 elements */
1345#define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1347
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348#define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1356 } \
1357 else \
1358 Py_DECREF(str);
1359
Andrew Dalke02758d62006-05-26 15:21:01 +00001360#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1363 if (str == NULL) \
1364 goto onError; \
1365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1367 } else { \
1368 if (PyList_Append(list, str)) { \
1369 Py_DECREF(str); \
1370 goto onError; \
1371 } \
1372 else \
1373 Py_DECREF(str); \
1374 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001375 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001376
1377/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001378#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1384
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001385Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001386split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
Andrew Dalke525eab32006-05-26 14:00:45 +00001388 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001389 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001390 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392 if (list == NULL)
1393 return NULL;
1394
Andrew Dalke02758d62006-05-26 15:21:01 +00001395 i = j = 0;
1396
1397 while (maxsplit-- > 0) {
1398 SKIP_SPACE(s, i, len);
1399 if (i==len) break;
1400 j = i; i++;
1401 SKIP_NONSPACE(s, i, len);
1402 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001404
1405 if (i < len) {
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s, i, len);
1409 if (i != len)
1410 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 Py_DECREF(list);
1416 return NULL;
1417}
1418
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001419Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001420split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421{
Andrew Dalke525eab32006-05-26 14:00:45 +00001422 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425
1426 if (list == NULL)
1427 return NULL;
1428
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001429 i = j = 0;
1430 while ((j < len) && (maxcount-- > 0)) {
1431 for(; j<len; j++) {
1432 /* I found that using memchr makes no difference */
1433 if (s[j] == ch) {
1434 SPLIT_ADD(s, i, j);
1435 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001437 }
1438 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001440 if (i <= len) {
1441 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001443 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444 return list;
1445
1446 onError:
1447 Py_DECREF(list);
1448 return NULL;
1449}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001451PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452"S.split([sep [,maxsplit]]) -> list of strings\n\
1453\n\
1454Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001456splits are done. If sep is not specified or is None, any\n\
1457whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
1459static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001460string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001462 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001465 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001466#ifdef USE_FAST
1467 Py_ssize_t pos;
1468#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Martin v. Löwis9c830762006-04-13 08:37:17 +00001470 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001473 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1479 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001483#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 if (n == 0) {
1488 PyErr_SetString(PyExc_ValueError, "empty separator");
1489 return NULL;
1490 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491 else if (n == 1)
1492 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493
Andrew Dalke525eab32006-05-26 14:00:45 +00001494 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (list == NULL)
1496 return NULL;
1497
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001498#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001500 while (maxsplit-- > 0) {
1501 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502 if (pos < 0)
1503 break;
1504 j = i+pos;
1505 SPLIT_ADD(s, i, j);
1506 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001508#else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1516 }
1517 }
1518 }
1519#endif
1520 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001521 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return list;
1523
Andrew Dalke525eab32006-05-26 14:00:45 +00001524 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001529PyDoc_STRVAR(partition__doc__,
1530"S.partition(sep) -> (head, sep, tail)\n\
1531\n\
1532Searches for the separator sep in S, and returns the part before it,\n\
1533the separator itself, and the part after it. If the separator is not\n\
1534found, returns S and two empty strings.");
1535
1536static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001537string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001539 const char *sep;
1540 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001541
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001545 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001546#ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001548 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001551 return NULL;
1552
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001553 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1557 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558}
1559
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001560PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001561"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001562\n\
1563Searches for the separator sep in S, starting at the end of S, and returns\n\
1564the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001565separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001566
1567static PyObject *
1568string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569{
1570 const char *sep;
1571 Py_ssize_t sep_len;
1572
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1576 }
1577#ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580#endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1583
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001584 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1588 );
1589}
1590
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001591Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001592rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593{
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597
1598 if (list == NULL)
1599 return NULL;
1600
Andrew Dalke02758d62006-05-26 15:21:01 +00001601 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001602
Andrew Dalke02758d62006-05-26 15:21:01 +00001603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1616
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001618 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001619 if (PyList_Reverse(list) < 0)
1620 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001627Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001628rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629{
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001644 }
1645 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
1647 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001650 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001651 if (PyList_Reverse(list) < 0)
1652 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 return list;
1654
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1658}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659
1660PyDoc_STRVAR(rsplit__doc__,
1661"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662\n\
1663Return a list of the words in the string S, using sep as the\n\
1664delimiter string, starting at the end of the string and working\n\
1665to the front. If maxsplit is given, at most maxsplit splits are\n\
1666done. If sep is not specified or is None, any whitespace string\n\
1667is a separator.");
1668
1669static PyObject *
1670string_rsplit(PyStringObject *self, PyObject *args)
1671{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001675 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Martin v. Löwis9c830762006-04-13 08:37:17 +00001677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return NULL;
1679 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001680 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
1687#ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690#endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1697 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700
Andrew Dalke525eab32006-05-26 14:00:45 +00001701 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 if (list == NULL)
1703 return NULL;
1704
1705 j = len;
1706 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001707
1708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001715 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001716 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001717 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return list;
1723
Andrew Dalke525eab32006-05-26 14:00:45 +00001724onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 Py_DECREF(list);
1726 return NULL;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731"S.join(sequence) -> string\n\
1732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
1736static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001737string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
1739 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 return NULL;
1751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001753 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1757 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001763 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001768 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1783 */
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001786 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001787 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001791 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001792 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001793 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 Py_DECREF(seq);
1795 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001802 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 Py_DECREF(seq);
1804 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 }
1807
1808 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001809 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1813 }
1814
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001821 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 p += n;
1823 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001824 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001828
Jeremy Hylton49048292000-07-11 03:28:17 +00001829 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
Tim Peters52e155e2001-06-16 05:42:57 +00001833PyObject *
1834_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001835{
Tim Petersa7259592001-06-16 05:11:17 +00001836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001839}
1840
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001841Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001843{
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1854}
1855
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001856Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001857string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001860 const char *sub;
1861 Py_ssize_t sub_len;
1862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001864 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 return -2;
1867 if (PyString_Check(subobj)) {
1868 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001869 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001871#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001873 return PyUnicode_Find(
1874 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001876 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 return -2;
1880
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001881 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001882 return stringlib_find_slice(
1883 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001885 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889}
1890
1891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893"S.find(sub [,start [,end]]) -> int\n\
1894\n\
1895Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001896such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897arguments start and end are interpreted as in slice notation.\n\
1898\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001902string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (result == -2)
1906 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.index(sub [,start [,end]]) -> int\n\
1913\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001914Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
1916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001917string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920 if (result == -2)
1921 return NULL;
1922 if (result == -1) {
1923 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001924 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 return NULL;
1926 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928}
1929
1930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932"S.rfind(sub [,start [,end]]) -> int\n\
1933\n\
1934Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001935such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936arguments start and end are interpreted as in slice notation.\n\
1937\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 if (result == -2)
1945 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rindex(sub [,start [,end]]) -> int\n\
1952\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001953Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
1955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001956string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001958 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 if (result == -2)
1960 return NULL;
1961 if (result == -1) {
1962 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001963 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return NULL;
1965 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001970Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1972{
1973 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978
1979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
1981 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982 i++;
1983 }
1984 }
1985
1986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
1990 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991 j++;
1992 }
1993
1994 if (i == 0 && j == len && PyString_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyString_FromStringAndSize(s+i, j-i);
2000}
2001
2002
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002004do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005{
2006 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012 i++;
2013 }
2014 }
2015
2016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021 j++;
2022 }
2023
Tim Peters8fa5dd02001-09-12 02:18:30 +00002024 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 Py_INCREF(self);
2026 return (PyObject*)self;
2027 }
2028 else
2029 return PyString_FromStringAndSize(s+i, j-i);
2030}
2031
2032
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002033Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002034do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2035{
2036 PyObject *sep = NULL;
2037
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002038 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039 return NULL;
2040
2041 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 if (PyString_Check(sep))
2043 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002045 else if (PyUnicode_Check(sep)) {
2046 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047 PyObject *res;
2048 if (uniself==NULL)
2049 return NULL;
2050 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051 striptype, sep);
2052 Py_DECREF(uniself);
2053 return res;
2054 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002062 STRIPNAME(striptype));
2063 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064 }
2065
2066 return do_strip(self, striptype);
2067}
2068
2069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002070PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002071"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072\n\
2073Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002074whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075If chars is given and not None, remove characters in chars instead.\n\
2076If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077
2078static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081 if (PyTuple_GET_SIZE(args) == 0)
2082 return do_strip(self, BOTHSTRIP); /* Common case */
2083 else
2084 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085}
2086
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002089"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092If chars is given and not None, remove characters in chars instead.\n\
2093If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
2095static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, LEFTSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102}
2103
2104
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002108Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109If chars is given and not None, remove characters in chars instead.\n\
2110If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
2112static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, RIGHTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123"S.lower() -> string\n\
2124\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002127/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128#ifndef _tolower
2129#define _tolower tolower
2130#endif
2131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002133string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002135 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002136 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002137 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002139 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002142
2143 s = PyString_AS_STRING(newobj);
2144
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002145 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002148 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002149 if (isupper(c))
2150 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002152
Anthony Baxtera6286212006-04-11 07:42:36 +00002153 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154}
2155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157"S.upper() -> string\n\
2158\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002161#ifndef _toupper
2162#define _toupper toupper
2163#endif
2164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002166string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002169 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002170 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002172 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002175
2176 s = PyString_AS_STRING(newobj);
2177
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002178 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002181 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002182 if (islower(c))
2183 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002185
Anthony Baxtera6286212006-04-11 07:42:36 +00002186 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187}
2188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190"S.title() -> string\n\
2191\n\
2192Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194
2195static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002196string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197{
2198 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002199 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202
Anthony Baxtera6286212006-04-11 07:42:36 +00002203 newobj = PyString_FromStringAndSize(NULL, n);
2204 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002206 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 for (i = 0; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (islower(c)) {
2210 if (!previous_is_cased)
2211 c = toupper(c);
2212 previous_is_cased = 1;
2213 } else if (isupper(c)) {
2214 if (previous_is_cased)
2215 c = tolower(c);
2216 previous_is_cased = 1;
2217 } else
2218 previous_is_cased = 0;
2219 *s_new++ = c;
2220 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002221 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222}
2223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225"S.capitalize() -> string\n\
2226\n\
2227Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
2230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002231string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002234 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002235 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 newobj = PyString_FromStringAndSize(NULL, n);
2238 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 if (0 < n) {
2242 int c = Py_CHARMASK(*s++);
2243 if (islower(c))
2244 *s_new = toupper(c);
2245 else
2246 *s_new = c;
2247 s_new++;
2248 }
2249 for (i = 1; i < n; i++) {
2250 int c = Py_CHARMASK(*s++);
2251 if (isupper(c))
2252 *s_new = tolower(c);
2253 else
2254 *s_new = c;
2255 s_new++;
2256 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
2260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002261PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262"S.count(sub[, start[, end]]) -> int\n\
2263\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002264Return the number of non-overlapping occurrences of substring sub in\n\
2265string S[start:end]. Optional arguments start and end are interpreted\n\
2266as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
2268static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002269string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002271 PyObject *sub_obj;
2272 const char *str = PyString_AS_STRING(self), *sub;
2273 Py_ssize_t sub_len;
2274 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002276 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002279
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 if (PyString_Check(sub_obj)) {
2281 sub = PyString_AS_STRING(sub_obj);
2282 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002284#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002286 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002287 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002288 if (count == -1)
2289 return NULL;
2290 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002291 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002294 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 return NULL;
2296
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002297 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002298
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002299 return PyInt_FromSsize_t(
2300 stringlib_count(str + start, end - start, sub, sub_len)
2301 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302}
2303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305"S.swapcase() -> string\n\
2306\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309
2310static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002311string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312{
2313 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002314 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002315 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Anthony Baxtera6286212006-04-11 07:42:36 +00002317 newobj = PyString_FromStringAndSize(NULL, n);
2318 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002320 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 for (i = 0; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (islower(c)) {
2324 *s_new = toupper(c);
2325 }
2326 else if (isupper(c)) {
2327 *s_new = tolower(c);
2328 }
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002333 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334}
2335
2336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002337PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338"S.translate(table [,deletechars]) -> string\n\
2339\n\
2340Return a copy of the string S, where all characters occurring\n\
2341in the optional argument deletechars are removed, and the\n\
2342remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002343translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344
2345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002346string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 register char *input, *output;
2349 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002350 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002353 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 PyObject *result;
2355 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002358 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361
2362 if (PyString_Check(tableobj)) {
2363 table1 = PyString_AS_STRING(tableobj);
2364 tablen = PyString_GET_SIZE(tableobj);
2365 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002366#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002368 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 parameter; instead a mapping to None will cause characters
2370 to be deleted. */
2371 if (delobj != NULL) {
2372 PyErr_SetString(PyExc_TypeError,
2373 "deletions are implemented differently for unicode");
2374 return NULL;
2375 }
2376 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2377 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002378#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381
Martin v. Löwis00b61272002-12-12 20:03:19 +00002382 if (tablen != 256) {
2383 PyErr_SetString(PyExc_ValueError,
2384 "translation table must be 256 characters long");
2385 return NULL;
2386 }
2387
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 if (delobj != NULL) {
2389 if (PyString_Check(delobj)) {
2390 del_table = PyString_AS_STRING(delobj);
2391 dellen = PyString_GET_SIZE(delobj);
2392 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002393#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394 else if (PyUnicode_Check(delobj)) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 }
2403 else {
2404 del_table = NULL;
2405 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406 }
2407
2408 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002409 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 result = PyString_FromStringAndSize((char *)NULL, inlen);
2411 if (result == NULL)
2412 return NULL;
2413 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002414 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415
2416 if (dellen == 0) {
2417 /* If no deletions are required, use faster code */
2418 for (i = inlen; --i >= 0; ) {
2419 c = Py_CHARMASK(*input++);
2420 if (Py_CHARMASK((*output++ = table[c])) != c)
2421 changed = 1;
2422 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002423 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 return result;
2425 Py_DECREF(result);
2426 Py_INCREF(input_obj);
2427 return input_obj;
2428 }
2429
2430 for (i = 0; i < 256; i++)
2431 trans_table[i] = Py_CHARMASK(table[i]);
2432
2433 for (i = 0; i < dellen; i++)
2434 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2435
2436 for (i = inlen; --i >= 0; ) {
2437 c = Py_CHARMASK(*input++);
2438 if (trans_table[c] != -1)
2439 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2440 continue;
2441 changed = 1;
2442 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002443 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2447 }
2448 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002449 if (inlen > 0)
2450 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 return result;
2452}
2453
2454
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002455#define FORWARD 1
2456#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002458/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002460#define findchar(target, target_len, c) \
2461 ((char *)memchr((const void *)(target), c, target_len))
2462
2463/* String ops must return a string. */
2464/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002465Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002466return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002468 if (PyString_CheckExact(self)) {
2469 Py_INCREF(self);
2470 return self;
2471 }
2472 return (PyStringObject *)PyString_FromStringAndSize(
2473 PyString_AS_STRING(self),
2474 PyString_GET_SIZE(self));
2475}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002477Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002478countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002479{
2480 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002481 const char *start=target;
2482 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002484 while ( (start=findchar(start, end-start, c)) != NULL ) {
2485 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002486 if (count >= maxcount)
2487 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002488 start += 1;
2489 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002490 return count;
2491}
2492
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002493Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002494findstring(const char *target, Py_ssize_t target_len,
2495 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496 Py_ssize_t start,
2497 Py_ssize_t end,
2498 int direction)
2499{
2500 if (start < 0) {
2501 start += target_len;
2502 if (start < 0)
2503 start = 0;
2504 }
2505 if (end > target_len) {
2506 end = target_len;
2507 } else if (end < 0) {
2508 end += target_len;
2509 if (end < 0)
2510 end = 0;
2511 }
2512
2513 /* zero-length substrings always match at the first attempt */
2514 if (pattern_len == 0)
2515 return (direction > 0) ? start : end;
2516
2517 end -= pattern_len;
2518
2519 if (direction < 0) {
2520 for (; end >= start; end--)
2521 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2522 return end;
2523 } else {
2524 for (; start <= end; start++)
2525 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2526 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002527 }
2528 return -1;
2529}
2530
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002531Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002532countstring(const char *target, Py_ssize_t target_len,
2533 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002534 Py_ssize_t start,
2535 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002536 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002540 if (start < 0) {
2541 start += target_len;
2542 if (start < 0)
2543 start = 0;
2544 }
2545 if (end > target_len) {
2546 end = target_len;
2547 } else if (end < 0) {
2548 end += target_len;
2549 if (end < 0)
2550 end = 0;
2551 }
2552
2553 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002554 if (pattern_len == 0 || maxcount == 0) {
2555 if (target_len+1 < maxcount)
2556 return target_len+1;
2557 return maxcount;
2558 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559
2560 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002561 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002562 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2564 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002565 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 end -= pattern_len-1;
2567 }
2568 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002569 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2571 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002572 if (--maxcount <= 0)
2573 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002574 start += pattern_len-1;
2575 }
2576 }
2577 return count;
2578}
2579
2580
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002581/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002582
2583/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002584Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002586 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587 Py_ssize_t maxcount)
2588{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002589 char *self_s, *result_s;
2590 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 Py_ssize_t count, i, product;
2592 PyStringObject *result;
2593
2594 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002595
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002596 /* 1 at the end plus 1 after every character */
2597 count = self_len+1;
2598 if (maxcount < count)
2599 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002600
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002601 /* Check for overflow */
2602 /* result_len = count * to_len + self_len; */
2603 product = count * to_len;
2604 if (product / to_len != count) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "replace string is too long");
2607 return NULL;
2608 }
2609 result_len = product + self_len;
2610 if (result_len < 0) {
2611 PyErr_SetString(PyExc_OverflowError,
2612 "replace string is too long");
2613 return NULL;
2614 }
2615
2616 if (! (result = (PyStringObject *)
2617 PyString_FromStringAndSize(NULL, result_len)) )
2618 return NULL;
2619
2620 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002621 result_s = PyString_AS_STRING(result);
2622
2623 /* TODO: special case single character, which doesn't need memcpy */
2624
2625 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002626 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002627 result_s += to_len;
2628 count -= 1;
2629
2630 for (i=0; i<count; i++) {
2631 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002632 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633 result_s += to_len;
2634 }
2635
2636 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002637 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638
2639 return result;
2640}
2641
2642/* Special case for deleting a single character */
2643/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002644Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002645replace_delete_single_character(PyStringObject *self,
2646 char from_c, Py_ssize_t maxcount)
2647{
2648 char *self_s, *result_s;
2649 char *start, *next, *end;
2650 Py_ssize_t self_len, result_len;
2651 Py_ssize_t count;
2652 PyStringObject *result;
2653
2654 self_len = PyString_GET_SIZE(self);
2655 self_s = PyString_AS_STRING(self);
2656
Andrew Dalke51324072006-05-26 20:25:22 +00002657 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002658 if (count == 0) {
2659 return return_self(self);
2660 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661
2662 result_len = self_len - count; /* from_len == 1 */
2663 assert(result_len>=0);
2664
2665 if ( (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2667 return NULL;
2668 result_s = PyString_AS_STRING(result);
2669
2670 start = self_s;
2671 end = self_s + self_len;
2672 while (count-- > 0) {
2673 next = findchar(start, end-start, from_c);
2674 if (next == NULL)
2675 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002676 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677 result_s += (next-start);
2678 start = next+1;
2679 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002680 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002681
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002682 return result;
2683}
2684
2685/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2686
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002687Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002688replace_delete_substring(PyStringObject *self,
2689 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002690 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002691 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002692 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002693 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002694 Py_ssize_t count, offset;
2695 PyStringObject *result;
2696
2697 self_len = PyString_GET_SIZE(self);
2698 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002699
2700 count = countstring(self_s, self_len,
2701 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002702 0, self_len, 1,
2703 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704
2705 if (count == 0) {
2706 /* no matches */
2707 return return_self(self);
2708 }
2709
2710 result_len = self_len - (count * from_len);
2711 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002712
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002713 if ( (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2715 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002716
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002718
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 start = self_s;
2720 end = self_s + self_len;
2721 while (count-- > 0) {
2722 offset = findstring(start, end-start,
2723 from_s, from_len,
2724 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002725 if (offset == -1)
2726 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002727 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002728
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002729 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002730
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731 result_s += (next-start);
2732 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002734 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002735 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002736}
2737
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002739Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740replace_single_character_in_place(PyStringObject *self,
2741 char from_c, char to_c,
2742 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002743{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 char *self_s, *result_s, *start, *end, *next;
2745 Py_ssize_t self_len;
2746 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002747
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002748 /* The result string will be the same size */
2749 self_s = PyString_AS_STRING(self);
2750 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002753
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 if (next == NULL) {
2755 /* No matches; return the original string */
2756 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002757 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002758
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002760 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 if (result == NULL)
2762 return NULL;
2763 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002764 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002765
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766 /* change everything in-place, starting with this one */
2767 start = result_s + (next-self_s);
2768 *start = to_c;
2769 start++;
2770 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002771
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772 while (--maxcount > 0) {
2773 next = findchar(start, end-start, from_c);
2774 if (next == NULL)
2775 break;
2776 *next = to_c;
2777 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002778 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002779
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002781}
2782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002784Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002786 const char *from_s, Py_ssize_t from_len,
2787 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 Py_ssize_t maxcount)
2789{
2790 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002791 char *self_s;
2792 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002794
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 self_s = PyString_AS_STRING(self);
2798 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 offset = findstring(self_s, self_len,
2801 from_s, from_len,
2802 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 if (offset == -1) {
2804 /* No matches; return the original string */
2805 return return_self(self);
2806 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002807
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002809 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810 if (result == NULL)
2811 return NULL;
2812 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002813 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002814
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 /* change everything in-place, starting with this one */
2816 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002817 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 start += from_len;
2819 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002820
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821 while ( --maxcount > 0) {
2822 offset = findstring(start, end-start,
2823 from_s, from_len,
2824 0, end-start, FORWARD);
2825 if (offset==-1)
2826 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002827 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 start += offset+from_len;
2829 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002830
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002831 return result;
2832}
2833
2834/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002835Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836replace_single_character(PyStringObject *self,
2837 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002838 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 Py_ssize_t maxcount)
2840{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002841 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002843 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 Py_ssize_t count, product;
2845 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 self_s = PyString_AS_STRING(self);
2848 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002849
Andrew Dalke51324072006-05-26 20:25:22 +00002850 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2854 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002855
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product = count * (to_len-1);
2859 if (product / (to_len-1) != count) {
2860 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861 return NULL;
2862 }
2863 result_len = self_len + product;
2864 if (result_len < 0) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2867 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002868
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 if ( (result = (PyStringObject *)
2870 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871 return NULL;
2872 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 start = self_s;
2875 end = self_s + self_len;
2876 while (count-- > 0) {
2877 next = findchar(start, end-start, from_c);
2878 if (next == NULL)
2879 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002880
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 if (next == start) {
2882 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002883 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002884 result_s += to_len;
2885 start += 1;
2886 } else {
2887 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002888 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002889 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002890 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891 result_s += to_len;
2892 start = next+1;
2893 }
2894 }
2895 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002896 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002897
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 return result;
2899}
2900
2901/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002902Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002904 const char *from_s, Py_ssize_t from_len,
2905 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002907 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002909 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 Py_ssize_t count, offset, product;
2911 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002912
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002913 self_s = PyString_AS_STRING(self);
2914 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002915
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 count = countstring(self_s, self_len,
2917 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002918 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 if (count == 0) {
2920 /* no matches, return unchanged */
2921 return return_self(self);
2922 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002923
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002924 /* Check for overflow */
2925 /* result_len = self_len + count * (to_len-from_len) */
2926 product = count * (to_len-from_len);
2927 if (product / (to_len-from_len) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2930 }
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002936
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002941
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 offset = findstring(start, end-start,
2946 from_s, from_len,
2947 0, end-start, FORWARD);
2948 if (offset == -1)
2949 break;
2950 next = start+offset;
2951 if (next == start) {
2952 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002953 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002954 result_s += to_len;
2955 start += from_len;
2956 } else {
2957 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002960 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 result_s += to_len;
2962 start = next+from_len;
2963 }
2964 }
2965 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002966 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002967
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002968 return result;
2969}
2970
2971
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002972Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002974 const char *from_s, Py_ssize_t from_len,
2975 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 Py_ssize_t maxcount)
2977{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 if (maxcount < 0) {
2979 maxcount = PY_SSIZE_T_MAX;
2980 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2981 /* nothing to do; return the original string */
2982 return return_self(self);
2983 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002984
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002985 if (maxcount == 0 ||
2986 (from_len == 0 && to_len == 0)) {
2987 /* nothing to do; return the original string */
2988 return return_self(self);
2989 }
2990
2991 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002992
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 if (from_len == 0) {
2994 /* insert the 'to' string everywhere. */
2995 /* >>> "Python".replace("", ".") */
2996 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002997 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998 }
2999
3000 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001 /* point for an empty self string to generate a non-empty string */
3002 /* Special case so the remaining code always gets a non-empty string */
3003 if (PyString_GET_SIZE(self) == 0) {
3004 return return_self(self);
3005 }
3006
3007 if (to_len == 0) {
3008 /* delete all occurances of 'from' string */
3009 if (from_len == 1) {
3010 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003011 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003013 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003014 }
3015 }
3016
3017 /* Handle special case where both strings have the same length */
3018
3019 if (from_len == to_len) {
3020 if (from_len == 1) {
3021 return replace_single_character_in_place(
3022 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003023 from_s[0],
3024 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003025 maxcount);
3026 } else {
3027 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003028 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 }
3030 }
3031
3032 /* Otherwise use the more generic algorithms */
3033 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003034 return replace_single_character(self, from_s[0],
3035 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003036 } else {
3037 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 }
3040}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003042PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003043"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044\n\
3045Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003046old replaced by new. If the optional argument count is\n\
3047given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
3049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003050string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003052 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003054 const char *from_s, *to_s;
3055 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003060 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003061 from_s = PyString_AS_STRING(from);
3062 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003064#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003066 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003067 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003068#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003069 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 return NULL;
3071
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 to_s = PyString_AS_STRING(to);
3074 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003079 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003081 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 return NULL;
3083
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003085 from_s, from_len,
3086 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087}
3088
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003091/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003092 * against substr, using the start and end arguments. Returns
3093 * -1 on error, 0 if not found and 1 if found.
3094 */
3095Py_LOCAL(int)
3096_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3097 Py_ssize_t end, int direction)
3098{
3099 Py_ssize_t len = PyString_GET_SIZE(self);
3100 Py_ssize_t slen;
3101 const char* sub;
3102 const char* str;
3103
3104 if (PyString_Check(substr)) {
3105 sub = PyString_AS_STRING(substr);
3106 slen = PyString_GET_SIZE(substr);
3107 }
3108#ifdef Py_USING_UNICODE
3109 else if (PyUnicode_Check(substr))
3110 return PyUnicode_Tailmatch((PyObject *)self,
3111 substr, start, end, direction);
3112#endif
3113 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3114 return -1;
3115 str = PyString_AS_STRING(self);
3116
3117 string_adjust_indices(&start, &end, len);
3118
3119 if (direction < 0) {
3120 /* startswith */
3121 if (start+slen > len)
3122 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003123 } else {
3124 /* endswith */
3125 if (end-start < slen || start > len)
3126 return 0;
3127
3128 if (end-slen > start)
3129 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003130 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003131 if (end-start >= slen)
3132 return ! memcmp(str+start, sub, slen);
3133 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003134}
3135
3136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003137PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003138"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003140Return True if S starts with the specified prefix, False otherwise.\n\
3141With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003142With optional end, stop comparing S at that position.\n\
3143prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144
3145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003146string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003148 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003149 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003151 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152
Guido van Rossumc6821402000-05-08 14:08:05 +00003153 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3154 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003156 if (PyTuple_Check(subobj)) {
3157 Py_ssize_t i;
3158 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3159 result = _string_tailmatch(self,
3160 PyTuple_GET_ITEM(subobj, i),
3161 start, end, -1);
3162 if (result == -1)
3163 return NULL;
3164 else if (result) {
3165 Py_RETURN_TRUE;
3166 }
3167 }
3168 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 }
Georg Brandl24250812006-06-09 18:45:48 +00003170 result = _string_tailmatch(self, subobj, start, end, -1);
3171 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003173 else
Georg Brandl24250812006-06-09 18:45:48 +00003174 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003175}
3176
3177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003178PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003179"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003181Return True if S ends with the specified suffix, False otherwise.\n\
3182With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003183With optional end, stop comparing S at that position.\n\
3184suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185
3186static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003187string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003189 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003190 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003192 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193
Guido van Rossumc6821402000-05-08 14:08:05 +00003194 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3195 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003197 if (PyTuple_Check(subobj)) {
3198 Py_ssize_t i;
3199 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3200 result = _string_tailmatch(self,
3201 PyTuple_GET_ITEM(subobj, i),
3202 start, end, +1);
3203 if (result == -1)
3204 return NULL;
3205 else if (result) {
3206 Py_RETURN_TRUE;
3207 }
3208 }
3209 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 }
Georg Brandl24250812006-06-09 18:45:48 +00003211 result = _string_tailmatch(self, subobj, start, end, +1);
3212 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 else
Georg Brandl24250812006-06-09 18:45:48 +00003215 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216}
3217
3218
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003219PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003220"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222Encodes S using the codec registered for encoding. encoding defaults\n\
3223to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003225a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226'xmlcharrefreplace' as well as any other name registered with\n\
3227codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003228
3229static PyObject *
3230string_encode(PyStringObject *self, PyObject *args)
3231{
3232 char *encoding = NULL;
3233 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003234 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003235
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003236 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003239 if (v == NULL)
3240 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003241 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242 PyErr_Format(PyExc_TypeError,
3243 "encoder did not return a string/unicode object "
3244 "(type=%.400s)",
3245 v->ob_type->tp_name);
3246 Py_DECREF(v);
3247 return NULL;
3248 }
3249 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003250
3251 onError:
3252 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003253}
3254
3255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003256PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257"S.decode([encoding[,errors]]) -> object\n\
3258\n\
3259Decodes S using the codec registered for encoding. encoding defaults\n\
3260to the default encoding. errors may be given to set a different error\n\
3261handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003262a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263as well as any other name registerd with codecs.register_error that is\n\
3264able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003265
3266static PyObject *
3267string_decode(PyStringObject *self, PyObject *args)
3268{
3269 char *encoding = NULL;
3270 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003272
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003276 if (v == NULL)
3277 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003278 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279 PyErr_Format(PyExc_TypeError,
3280 "decoder did not return a string/unicode object "
3281 "(type=%.400s)",
3282 v->ob_type->tp_name);
3283 Py_DECREF(v);
3284 return NULL;
3285 }
3286 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003287
3288 onError:
3289 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003290}
3291
3292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003293PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294"S.expandtabs([tabsize]) -> string\n\
3295\n\
3296Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003297If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298
3299static PyObject*
3300string_expandtabs(PyStringObject *self, PyObject *args)
3301{
Guido van Rossum44a93e52008-03-11 21:14:54 +00003302 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003303 char *q;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003304 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305 PyObject *u;
3306 int tabsize = 8;
3307
3308 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309 return NULL;
3310
Thomas Wouters7e474022000-07-16 12:04:32 +00003311 /* First pass: determine size of output string */
Guido van Rossum44a93e52008-03-11 21:14:54 +00003312 i = 0; /* chars up to and including most recent \n or \r */
3313 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3314 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003315 for (p = PyString_AS_STRING(self); p < e; p++)
3316 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003317 if (tabsize > 0) {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003318 incr = tabsize - (j % tabsize);
3319 if (j > PY_SSIZE_T_MAX - incr)
3320 goto overflow1;
3321 j += incr;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003322 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003323 }
3324 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003325 if (j > PY_SSIZE_T_MAX - 1)
3326 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003327 j++;
3328 if (*p == '\n' || *p == '\r') {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003329 if (i > PY_SSIZE_T_MAX - j)
3330 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003331 i += j;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003332 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003333 }
3334 }
3335
Guido van Rossum44a93e52008-03-11 21:14:54 +00003336 if (i > PY_SSIZE_T_MAX - j)
3337 goto overflow1;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003338
Guido van Rossum4c08d552000-03-10 22:55:18 +00003339 /* Second pass: create output string and fill it */
3340 u = PyString_FromStringAndSize(NULL, i + j);
3341 if (!u)
3342 return NULL;
3343
Guido van Rossum44a93e52008-03-11 21:14:54 +00003344 j = 0; /* same as in first pass */
3345 q = PyString_AS_STRING(u); /* next output char */
3346 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003347
3348 for (p = PyString_AS_STRING(self); p < e; p++)
3349 if (*p == '\t') {
3350 if (tabsize > 0) {
3351 i = tabsize - (j % tabsize);
3352 j += i;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003353 while (i--) {
3354 if (q >= qe)
3355 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356 *q++ = ' ';
Guido van Rossum44a93e52008-03-11 21:14:54 +00003357 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358 }
3359 }
3360 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003361 if (q >= qe)
3362 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 *q++ = *p;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003364 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003365 if (*p == '\n' || *p == '\r')
3366 j = 0;
3367 }
3368
3369 return u;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003370
3371 overflow2:
3372 Py_DECREF(u);
3373 overflow1:
3374 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3375 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376}
3377
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003378Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003379pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003380{
3381 PyObject *u;
3382
3383 if (left < 0)
3384 left = 0;
3385 if (right < 0)
3386 right = 0;
3387
Tim Peters8fa5dd02001-09-12 02:18:30 +00003388 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003389 Py_INCREF(self);
3390 return (PyObject *)self;
3391 }
3392
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003393 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003394 left + PyString_GET_SIZE(self) + right);
3395 if (u) {
3396 if (left)
3397 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003398 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003399 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003400 PyString_GET_SIZE(self));
3401 if (right)
3402 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3403 fill, right);
3404 }
3405
3406 return u;
3407}
3408
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003409PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003410"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003411"\n"
3412"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003413"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414
3415static PyObject *
3416string_ljust(PyStringObject *self, PyObject *args)
3417{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003418 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003419 char fillchar = ' ';
3420
Thomas Wouters4abb3662006-04-19 14:50:15 +00003421 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422 return NULL;
3423
Tim Peters8fa5dd02001-09-12 02:18:30 +00003424 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 Py_INCREF(self);
3426 return (PyObject*) self;
3427 }
3428
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430}
3431
3432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003433PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003435"\n"
3436"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003437"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438
3439static PyObject *
3440string_rjust(PyStringObject *self, PyObject *args)
3441{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003442 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003443 char fillchar = ' ';
3444
Thomas Wouters4abb3662006-04-19 14:50:15 +00003445 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446 return NULL;
3447
Tim Peters8fa5dd02001-09-12 02:18:30 +00003448 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449 Py_INCREF(self);
3450 return (PyObject*) self;
3451 }
3452
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003453 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454}
3455
3456
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003457PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003458"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003459"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460"Return S centered in a string of length width. Padding is\n"
3461"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003462
3463static PyObject *
3464string_center(PyStringObject *self, PyObject *args)
3465{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003466 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003467 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003468 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469
Thomas Wouters4abb3662006-04-19 14:50:15 +00003470 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003471 return NULL;
3472
Tim Peters8fa5dd02001-09-12 02:18:30 +00003473 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474 Py_INCREF(self);
3475 return (PyObject*) self;
3476 }
3477
3478 marg = width - PyString_GET_SIZE(self);
3479 left = marg / 2 + (marg & width & 1);
3480
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003481 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003482}
3483
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003484PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003485"S.zfill(width) -> string\n"
3486"\n"
3487"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003489
3490static PyObject *
3491string_zfill(PyStringObject *self, PyObject *args)
3492{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003493 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003494 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003495 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003496 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003497
Thomas Wouters4abb3662006-04-19 14:50:15 +00003498 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003499 return NULL;
3500
3501 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003502 if (PyString_CheckExact(self)) {
3503 Py_INCREF(self);
3504 return (PyObject*) self;
3505 }
3506 else
3507 return PyString_FromStringAndSize(
3508 PyString_AS_STRING(self),
3509 PyString_GET_SIZE(self)
3510 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003511 }
3512
3513 fill = width - PyString_GET_SIZE(self);
3514
3515 s = pad(self, fill, 0, '0');
3516
3517 if (s == NULL)
3518 return NULL;
3519
3520 p = PyString_AS_STRING(s);
3521 if (p[fill] == '+' || p[fill] == '-') {
3522 /* move sign to beginning of string */
3523 p[0] = p[fill];
3524 p[fill] = '0';
3525 }
3526
3527 return (PyObject*) s;
3528}
3529
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003530PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003531"S.isspace() -> bool\n\
3532\n\
3533Return True if all characters in S are whitespace\n\
3534and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535
3536static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003537string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538{
Fred Drakeba096332000-07-09 07:04:36 +00003539 register const unsigned char *p
3540 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003541 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542
Guido van Rossum4c08d552000-03-10 22:55:18 +00003543 /* Shortcut for single character strings */
3544 if (PyString_GET_SIZE(self) == 1 &&
3545 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003546 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003547
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003548 /* Special case for empty strings */
3549 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003551
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552 e = p + PyString_GET_SIZE(self);
3553 for (; p < e; p++) {
3554 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003555 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003556 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003557 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558}
3559
3560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003561PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003564Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003565and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566
3567static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003568string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003569{
Fred Drakeba096332000-07-09 07:04:36 +00003570 register const unsigned char *p
3571 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003572 register const unsigned char *e;
3573
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574 /* Shortcut for single character strings */
3575 if (PyString_GET_SIZE(self) == 1 &&
3576 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578
3579 /* Special case for empty strings */
3580 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582
3583 e = p + PyString_GET_SIZE(self);
3584 for (; p < e; p++) {
3585 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589}
3590
3591
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003592PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003595Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597
3598static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003599string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600{
Fred Drakeba096332000-07-09 07:04:36 +00003601 register const unsigned char *p
3602 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003603 register const unsigned char *e;
3604
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 /* Shortcut for single character strings */
3606 if (PyString_GET_SIZE(self) == 1 &&
3607 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609
3610 /* Special case for empty strings */
3611 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613
3614 e = p + PyString_GET_SIZE(self);
3615 for (; p < e; p++) {
3616 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003620}
3621
3622
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003623PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003626Return True if all characters in S are digits\n\
3627and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628
3629static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003630string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631{
Fred Drakeba096332000-07-09 07:04:36 +00003632 register const unsigned char *p
3633 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003634 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636 /* Shortcut for single character strings */
3637 if (PyString_GET_SIZE(self) == 1 &&
3638 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003641 /* Special case for empty strings */
3642 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003644
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645 e = p + PyString_GET_SIZE(self);
3646 for (; p < e; p++) {
3647 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651}
3652
3653
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003654PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003658at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
3660static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003661string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662{
Fred Drakeba096332000-07-09 07:04:36 +00003663 register const unsigned char *p
3664 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003665 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666 int cased;
3667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 /* Shortcut for single character strings */
3669 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003672 /* Special case for empty strings */
3673 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003675
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 e = p + PyString_GET_SIZE(self);
3677 cased = 0;
3678 for (; p < e; p++) {
3679 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681 else if (!cased && islower(*p))
3682 cased = 1;
3683 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685}
3686
3687
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003688PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003691Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003692at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693
3694static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003695string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696{
Fred Drakeba096332000-07-09 07:04:36 +00003697 register const unsigned char *p
3698 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003699 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 int cased;
3701
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 /* Shortcut for single character strings */
3703 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003706 /* Special case for empty strings */
3707 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003709
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710 e = p + PyString_GET_SIZE(self);
3711 cased = 0;
3712 for (; p < e; p++) {
3713 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715 else if (!cased && isupper(*p))
3716 cased = 1;
3717 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719}
3720
3721
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003722PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003725Return True if S is a titlecased string and there is at least one\n\
3726character in S, i.e. uppercase characters may only follow uncased\n\
3727characters and lowercase characters only cased ones. Return False\n\
3728otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729
3730static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003731string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732{
Fred Drakeba096332000-07-09 07:04:36 +00003733 register const unsigned char *p
3734 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003735 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736 int cased, previous_is_cased;
3737
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 /* Shortcut for single character strings */
3739 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003740 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003742 /* Special case for empty strings */
3743 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003745
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 e = p + PyString_GET_SIZE(self);
3747 cased = 0;
3748 previous_is_cased = 0;
3749 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003750 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751
3752 if (isupper(ch)) {
3753 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755 previous_is_cased = 1;
3756 cased = 1;
3757 }
3758 else if (islower(ch)) {
3759 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 previous_is_cased = 1;
3762 cased = 1;
3763 }
3764 else
3765 previous_is_cased = 0;
3766 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003767 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768}
3769
3770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003771PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003772"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773\n\
3774Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003775Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003776is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778static PyObject*
3779string_splitlines(PyStringObject *self, PyObject *args)
3780{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003781 register Py_ssize_t i;
3782 register Py_ssize_t j;
3783 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003784 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003785 PyObject *list;
3786 PyObject *str;
3787 char *data;
3788
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003789 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 return NULL;
3791
3792 data = PyString_AS_STRING(self);
3793 len = PyString_GET_SIZE(self);
3794
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003795 /* This does not use the preallocated list because splitlines is
3796 usually run with hundreds of newlines. The overhead of
3797 switching between PyList_SET_ITEM and append causes about a
3798 2-3% slowdown for that common case. A smarter implementation
3799 could move the if check out, so the SET_ITEMs are done first
3800 and the appends only done when the prealloc buffer is full.
3801 That's too much work for little gain.*/
3802
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803 list = PyList_New(0);
3804 if (!list)
3805 goto onError;
3806
3807 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003808 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 /* Find a line and append it */
3811 while (i < len && data[i] != '\n' && data[i] != '\r')
3812 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813
3814 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003815 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 if (i < len) {
3817 if (data[i] == '\r' && i + 1 < len &&
3818 data[i+1] == '\n')
3819 i += 2;
3820 else
3821 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003822 if (keepends)
3823 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003824 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003825 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 j = i;
3827 }
3828 if (j < len) {
3829 SPLIT_APPEND(data, j, len);
3830 }
3831
3832 return list;
3833
3834 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003835 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836 return NULL;
3837}
3838
3839#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003840#undef SPLIT_ADD
3841#undef MAX_PREALLOC
3842#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003843
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003844static PyObject *
3845string_getnewargs(PyStringObject *v)
3846{
3847 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3848}
3849
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003850
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003851static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003852string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853 /* Counterparts of the obsolete stropmodule functions; except
3854 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003855 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3856 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003857 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003858 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3859 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003860 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3861 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3862 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3863 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3864 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3865 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3866 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003867 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3868 capitalize__doc__},
3869 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3870 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3871 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003872 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003873 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3874 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3875 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3876 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3877 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3878 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3879 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003880 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3881 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003882 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3883 startswith__doc__},
3884 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3885 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3886 swapcase__doc__},
3887 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3888 translate__doc__},
3889 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3890 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3891 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3892 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3893 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3894 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3895 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3896 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3897 expandtabs__doc__},
3898 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3899 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003900 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003901 {NULL, NULL} /* sentinel */
3902};
3903
Jeremy Hylton938ace62002-07-17 16:30:39 +00003904static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003905str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3906
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003907static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003908string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003909{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003910 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003911 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003912
Guido van Rossumae960af2001-08-30 03:11:59 +00003913 if (type != &PyString_Type)
3914 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003915 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3916 return NULL;
3917 if (x == NULL)
3918 return PyString_FromString("");
3919 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003920}
3921
Guido van Rossumae960af2001-08-30 03:11:59 +00003922static PyObject *
3923str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3924{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003925 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003926 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003927
3928 assert(PyType_IsSubtype(type, &PyString_Type));
3929 tmp = string_new(&PyString_Type, args, kwds);
3930 if (tmp == NULL)
3931 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003932 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003933 n = PyString_GET_SIZE(tmp);
3934 pnew = type->tp_alloc(type, n);
3935 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003936 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003937 ((PyStringObject *)pnew)->ob_shash =
3938 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003939 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003940 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003941 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003942 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003943}
3944
Guido van Rossumcacfc072002-05-24 19:01:59 +00003945static PyObject *
3946basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3947{
3948 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003949 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003950 return NULL;
3951}
3952
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003953static PyObject *
3954string_mod(PyObject *v, PyObject *w)
3955{
3956 if (!PyString_Check(v)) {
3957 Py_INCREF(Py_NotImplemented);
3958 return Py_NotImplemented;
3959 }
3960 return PyString_Format(v, w);
3961}
3962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003963PyDoc_STRVAR(basestring_doc,
3964"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003965
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003966static PyNumberMethods string_as_number = {
3967 0, /*nb_add*/
3968 0, /*nb_subtract*/
3969 0, /*nb_multiply*/
3970 0, /*nb_divide*/
3971 string_mod, /*nb_remainder*/
3972};
3973
3974
Guido van Rossumcacfc072002-05-24 19:01:59 +00003975PyTypeObject PyBaseString_Type = {
3976 PyObject_HEAD_INIT(&PyType_Type)
3977 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003978 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003979 0,
3980 0,
3981 0, /* tp_dealloc */
3982 0, /* tp_print */
3983 0, /* tp_getattr */
3984 0, /* tp_setattr */
3985 0, /* tp_compare */
3986 0, /* tp_repr */
3987 0, /* tp_as_number */
3988 0, /* tp_as_sequence */
3989 0, /* tp_as_mapping */
3990 0, /* tp_hash */
3991 0, /* tp_call */
3992 0, /* tp_str */
3993 0, /* tp_getattro */
3994 0, /* tp_setattro */
3995 0, /* tp_as_buffer */
3996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3997 basestring_doc, /* tp_doc */
3998 0, /* tp_traverse */
3999 0, /* tp_clear */
4000 0, /* tp_richcompare */
4001 0, /* tp_weaklistoffset */
4002 0, /* tp_iter */
4003 0, /* tp_iternext */
4004 0, /* tp_methods */
4005 0, /* tp_members */
4006 0, /* tp_getset */
4007 &PyBaseObject_Type, /* tp_base */
4008 0, /* tp_dict */
4009 0, /* tp_descr_get */
4010 0, /* tp_descr_set */
4011 0, /* tp_dictoffset */
4012 0, /* tp_init */
4013 0, /* tp_alloc */
4014 basestring_new, /* tp_new */
4015 0, /* tp_free */
4016};
4017
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004018PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004019"str(object) -> string\n\
4020\n\
4021Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004022If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004023
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004024PyTypeObject PyString_Type = {
4025 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004026 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004027 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004028 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004029 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004030 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 (printfunc)string_print, /* tp_print */
4032 0, /* tp_getattr */
4033 0, /* tp_setattr */
4034 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004035 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004036 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004037 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004038 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039 (hashfunc)string_hash, /* tp_hash */
4040 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004041 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004042 PyObject_GenericGetAttr, /* tp_getattro */
4043 0, /* tp_setattro */
4044 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004045 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004046 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004047 string_doc, /* tp_doc */
4048 0, /* tp_traverse */
4049 0, /* tp_clear */
4050 (richcmpfunc)string_richcompare, /* tp_richcompare */
4051 0, /* tp_weaklistoffset */
4052 0, /* tp_iter */
4053 0, /* tp_iternext */
4054 string_methods, /* tp_methods */
4055 0, /* tp_members */
4056 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004057 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004058 0, /* tp_dict */
4059 0, /* tp_descr_get */
4060 0, /* tp_descr_set */
4061 0, /* tp_dictoffset */
4062 0, /* tp_init */
4063 0, /* tp_alloc */
4064 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004065 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066};
4067
4068void
Fred Drakeba096332000-07-09 07:04:36 +00004069PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004072 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004073 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004074 if (w == NULL || !PyString_Check(*pv)) {
4075 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004076 *pv = NULL;
4077 return;
4078 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 v = string_concat((PyStringObject *) *pv, w);
4080 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004081 *pv = v;
4082}
4083
Guido van Rossum013142a1994-08-30 08:19:36 +00004084void
Fred Drakeba096332000-07-09 07:04:36 +00004085PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004086{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004087 PyString_Concat(pv, w);
4088 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004089}
4090
4091
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092/* The following function breaks the notion that strings are immutable:
4093 it changes the size of a string. We get away with this only if there
4094 is only one module referencing the object. You can also think of it
4095 as creating a new string object and destroying the old one, only
4096 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004097 already be known to some other part of the code...
4098 Note that if there's not enough memory to resize the string, the original
4099 string object at *pv is deallocated, *pv is set to NULL, an "out of
4100 memory" exception is set, and -1 is returned. Else (on success) 0 is
4101 returned, and the value in *pv may or may not be the same as on input.
4102 As always, an extra byte is allocated for a trailing \0 byte (newsize
4103 does *not* include that), and a trailing \0 byte is stored.
4104*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004105
4106int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004107_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004108{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004109 register PyObject *v;
4110 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004111 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004112 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4113 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004114 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004115 Py_DECREF(v);
4116 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004117 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004118 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004119 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004120 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004121 _Py_ForgetReference(v);
4122 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004123 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004125 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004127 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004128 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004129 _Py_NewReference(*pv);
4130 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004131 sv->ob_size = newsize;
4132 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004133 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004134 return 0;
4135}
Guido van Rossume5372401993-03-16 12:15:04 +00004136
4137/* Helpers for formatstring */
4138
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004139Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004140getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004141{
Thomas Wouters977485d2006-02-16 15:59:12 +00004142 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004143 if (argidx < arglen) {
4144 (*p_argidx)++;
4145 if (arglen < 0)
4146 return args;
4147 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004148 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004150 PyErr_SetString(PyExc_TypeError,
4151 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004152 return NULL;
4153}
4154
Tim Peters38fd5b62000-09-21 05:43:11 +00004155/* Format codes
4156 * F_LJUST '-'
4157 * F_SIGN '+'
4158 * F_BLANK ' '
4159 * F_ALT '#'
4160 * F_ZERO '0'
4161 */
Guido van Rossume5372401993-03-16 12:15:04 +00004162#define F_LJUST (1<<0)
4163#define F_SIGN (1<<1)
4164#define F_BLANK (1<<2)
4165#define F_ALT (1<<3)
4166#define F_ZERO (1<<4)
4167
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004168Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004169formatfloat(char *buf, size_t buflen, int flags,
4170 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004171{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004172 /* fmt = '%#.' + `prec` + `type`
4173 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004174 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004175 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004176 x = PyFloat_AsDouble(v);
4177 if (x == -1.0 && PyErr_Occurred()) {
4178 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004179 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004180 }
Guido van Rossume5372401993-03-16 12:15:04 +00004181 if (prec < 0)
4182 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004183 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4184 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004185 /* Worst case length calc to ensure no buffer overrun:
4186
4187 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004188 fmt = %#.<prec>g
4189 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004190 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004191 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004192
4193 'f' formats:
4194 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4195 len = 1 + 50 + 1 + prec = 52 + prec
4196
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004197 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004198 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004199
4200 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004201 if (((type == 'g' || type == 'G') &&
4202 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004203 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004204 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004205 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004206 return -1;
4207 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004208 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4209 (flags&F_ALT) ? "#" : "",
4210 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004211 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004212 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004213}
4214
Tim Peters38fd5b62000-09-21 05:43:11 +00004215/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4216 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4217 * Python's regular ints.
4218 * Return value: a new PyString*, or NULL if error.
4219 * . *pbuf is set to point into it,
4220 * *plen set to the # of chars following that.
4221 * Caller must decref it when done using pbuf.
4222 * The string starting at *pbuf is of the form
4223 * "-"? ("0x" | "0X")? digit+
4224 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004225 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004226 * There will be at least prec digits, zero-filled on the left if
4227 * necessary to get that many.
4228 * val object to be converted
4229 * flags bitmask of format flags; only F_ALT is looked at
4230 * prec minimum number of digits; 0-fill on left if needed
4231 * type a character in [duoxX]; u acts the same as d
4232 *
4233 * CAUTION: o, x and X conversions on regular ints can never
4234 * produce a '-' sign, but can for Python's unbounded ints.
4235 */
4236PyObject*
4237_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4238 char **pbuf, int *plen)
4239{
4240 PyObject *result = NULL;
4241 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004242 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004243 int sign; /* 1 if '-', else 0 */
4244 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004245 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004246 int numdigits; /* len == numnondigits + numdigits */
4247 int numnondigits = 0;
4248
4249 switch (type) {
4250 case 'd':
4251 case 'u':
4252 result = val->ob_type->tp_str(val);
4253 break;
4254 case 'o':
4255 result = val->ob_type->tp_as_number->nb_oct(val);
4256 break;
4257 case 'x':
4258 case 'X':
4259 numnondigits = 2;
4260 result = val->ob_type->tp_as_number->nb_hex(val);
4261 break;
4262 default:
4263 assert(!"'type' not in [duoxX]");
4264 }
4265 if (!result)
4266 return NULL;
4267
Neal Norwitz56423e52006-08-13 18:11:08 +00004268 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004269 if (!buf) {
4270 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004271 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004272 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004273
Tim Peters38fd5b62000-09-21 05:43:11 +00004274 /* To modify the string in-place, there can only be one reference. */
4275 if (result->ob_refcnt != 1) {
4276 PyErr_BadInternalCall();
4277 return NULL;
4278 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004279 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004280 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004281 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4282 return NULL;
4283 }
4284 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004285 if (buf[len-1] == 'L') {
4286 --len;
4287 buf[len] = '\0';
4288 }
4289 sign = buf[0] == '-';
4290 numnondigits += sign;
4291 numdigits = len - numnondigits;
4292 assert(numdigits > 0);
4293
Tim Petersfff53252001-04-12 18:38:48 +00004294 /* Get rid of base marker unless F_ALT */
4295 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004296 /* Need to skip 0x, 0X or 0. */
4297 int skipped = 0;
4298 switch (type) {
4299 case 'o':
4300 assert(buf[sign] == '0');
4301 /* If 0 is only digit, leave it alone. */
4302 if (numdigits > 1) {
4303 skipped = 1;
4304 --numdigits;
4305 }
4306 break;
4307 case 'x':
4308 case 'X':
4309 assert(buf[sign] == '0');
4310 assert(buf[sign + 1] == 'x');
4311 skipped = 2;
4312 numnondigits -= 2;
4313 break;
4314 }
4315 if (skipped) {
4316 buf += skipped;
4317 len -= skipped;
4318 if (sign)
4319 buf[0] = '-';
4320 }
4321 assert(len == numnondigits + numdigits);
4322 assert(numdigits > 0);
4323 }
4324
4325 /* Fill with leading zeroes to meet minimum width. */
4326 if (prec > numdigits) {
4327 PyObject *r1 = PyString_FromStringAndSize(NULL,
4328 numnondigits + prec);
4329 char *b1;
4330 if (!r1) {
4331 Py_DECREF(result);
4332 return NULL;
4333 }
4334 b1 = PyString_AS_STRING(r1);
4335 for (i = 0; i < numnondigits; ++i)
4336 *b1++ = *buf++;
4337 for (i = 0; i < prec - numdigits; i++)
4338 *b1++ = '0';
4339 for (i = 0; i < numdigits; i++)
4340 *b1++ = *buf++;
4341 *b1 = '\0';
4342 Py_DECREF(result);
4343 result = r1;
4344 buf = PyString_AS_STRING(result);
4345 len = numnondigits + prec;
4346 }
4347
4348 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004349 if (type == 'X') {
4350 /* Need to convert all lower case letters to upper case.
4351 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004352 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004353 if (buf[i] >= 'a' && buf[i] <= 'x')
4354 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004355 }
4356 *pbuf = buf;
4357 *plen = len;
4358 return result;
4359}
4360
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004361Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004362formatint(char *buf, size_t buflen, int flags,
4363 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004364{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004365 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004366 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4367 + 1 + 1 = 24 */
4368 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004369 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004370 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004372 x = PyInt_AsLong(v);
4373 if (x == -1 && PyErr_Occurred()) {
4374 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004375 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004376 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004377 if (x < 0 && type == 'u') {
4378 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004379 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004380 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4381 sign = "-";
4382 else
4383 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004384 if (prec < 0)
4385 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004386
4387 if ((flags & F_ALT) &&
4388 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004389 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390 * of issues that cause pain:
4391 * - when 0 is being converted, the C standard leaves off
4392 * the '0x' or '0X', which is inconsistent with other
4393 * %#x/%#X conversions and inconsistent with Python's
4394 * hex() function
4395 * - there are platforms that violate the standard and
4396 * convert 0 with the '0x' or '0X'
4397 * (Metrowerks, Compaq Tru64)
4398 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004399 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004400 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004401 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004402 * We can achieve the desired consistency by inserting our
4403 * own '0x' or '0X' prefix, and substituting %x/%X in place
4404 * of %#x/%#X.
4405 *
4406 * Note that this is the same approach as used in
4407 * formatint() in unicodeobject.c
4408 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004409 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4410 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004411 }
4412 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004413 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4414 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004415 prec, type);
4416 }
4417
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004418 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4419 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004420 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004421 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004422 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004423 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004424 return -1;
4425 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004426 if (sign[0])
4427 PyOS_snprintf(buf, buflen, fmt, -x);
4428 else
4429 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004430 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004431}
4432
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004433Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004434formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004435{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004436 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004437 if (PyString_Check(v)) {
4438 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004439 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004440 }
4441 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004442 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004443 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004444 }
4445 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004446 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004447}
4448
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004449/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4450
4451 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4452 chars are formatted. XXX This is a magic number. Each formatting
4453 routine does bounds checking to ensure no overflow, but a better
4454 solution may be to malloc a buffer of appropriate size for each
4455 format. For now, the current solution is sufficient.
4456*/
4457#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004458
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004459PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004460PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004461{
4462 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004463 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004464 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004465 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004466 PyObject *result, *orig_args;
4467#ifdef Py_USING_UNICODE
4468 PyObject *v, *w;
4469#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004470 PyObject *dict = NULL;
4471 if (format == NULL || !PyString_Check(format) || args == NULL) {
4472 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004473 return NULL;
4474 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004475 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004476 fmt = PyString_AS_STRING(format);
4477 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004478 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004479 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004480 if (result == NULL)
4481 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004482 res = PyString_AsString(result);
4483 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004484 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004485 argidx = 0;
4486 }
4487 else {
4488 arglen = -1;
4489 argidx = -2;
4490 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004491 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4492 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004493 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004494 while (--fmtcnt >= 0) {
4495 if (*fmt != '%') {
4496 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004497 rescnt = fmtcnt + 100;
4498 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004499 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004500 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004501 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004503 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004504 }
4505 *res++ = *fmt++;
4506 }
4507 else {
4508 /* Got a format specifier */
4509 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004510 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004511 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004512 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004513 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004514 PyObject *v = NULL;
4515 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004516 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004517 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004518 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004519 char formatbuf[FORMATBUFLEN];
4520 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004521#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004522 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004523 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004524#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004525
Guido van Rossumda9c2711996-12-05 21:58:58 +00004526 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004527 if (*fmt == '(') {
4528 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004529 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004531 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004532
4533 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004535 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004536 goto error;
4537 }
4538 ++fmt;
4539 --fmtcnt;
4540 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004541 /* Skip over balanced parentheses */
4542 while (pcount > 0 && --fmtcnt >= 0) {
4543 if (*fmt == ')')
4544 --pcount;
4545 else if (*fmt == '(')
4546 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004547 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004548 }
4549 keylen = fmt - keystart - 1;
4550 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004551 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004552 "incomplete format key");
4553 goto error;
4554 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 key = PyString_FromStringAndSize(keystart,
4556 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004557 if (key == NULL)
4558 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004559 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004560 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004561 args_owned = 0;
4562 }
4563 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004564 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004565 if (args == NULL) {
4566 goto error;
4567 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004568 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004569 arglen = -1;
4570 argidx = -2;
4571 }
Guido van Rossume5372401993-03-16 12:15:04 +00004572 while (--fmtcnt >= 0) {
4573 switch (c = *fmt++) {
4574 case '-': flags |= F_LJUST; continue;
4575 case '+': flags |= F_SIGN; continue;
4576 case ' ': flags |= F_BLANK; continue;
4577 case '#': flags |= F_ALT; continue;
4578 case '0': flags |= F_ZERO; continue;
4579 }
4580 break;
4581 }
4582 if (c == '*') {
4583 v = getnextarg(args, arglen, &argidx);
4584 if (v == NULL)
4585 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004586 if (!PyInt_Check(v)) {
4587 PyErr_SetString(PyExc_TypeError,
4588 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004589 goto error;
4590 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004591 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004592 if (width < 0) {
4593 flags |= F_LJUST;
4594 width = -width;
4595 }
Guido van Rossume5372401993-03-16 12:15:04 +00004596 if (--fmtcnt >= 0)
4597 c = *fmt++;
4598 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004599 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004600 width = c - '0';
4601 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004602 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004603 if (!isdigit(c))
4604 break;
4605 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004606 PyErr_SetString(
4607 PyExc_ValueError,
4608 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004609 goto error;
4610 }
4611 width = width*10 + (c - '0');
4612 }
4613 }
4614 if (c == '.') {
4615 prec = 0;
4616 if (--fmtcnt >= 0)
4617 c = *fmt++;
4618 if (c == '*') {
4619 v = getnextarg(args, arglen, &argidx);
4620 if (v == NULL)
4621 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004622 if (!PyInt_Check(v)) {
4623 PyErr_SetString(
4624 PyExc_TypeError,
4625 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004626 goto error;
4627 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004628 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004629 if (prec < 0)
4630 prec = 0;
4631 if (--fmtcnt >= 0)
4632 c = *fmt++;
4633 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004634 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004635 prec = c - '0';
4636 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004637 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004638 if (!isdigit(c))
4639 break;
4640 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004641 PyErr_SetString(
4642 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004643 "prec too big");
4644 goto error;
4645 }
4646 prec = prec*10 + (c - '0');
4647 }
4648 }
4649 } /* prec */
4650 if (fmtcnt >= 0) {
4651 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004652 if (--fmtcnt >= 0)
4653 c = *fmt++;
4654 }
4655 }
4656 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004657 PyErr_SetString(PyExc_ValueError,
4658 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004659 goto error;
4660 }
4661 if (c != '%') {
4662 v = getnextarg(args, arglen, &argidx);
4663 if (v == NULL)
4664 goto error;
4665 }
4666 sign = 0;
4667 fill = ' ';
4668 switch (c) {
4669 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004670 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004671 len = 1;
4672 break;
4673 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004674#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004675 if (PyUnicode_Check(v)) {
4676 fmt = fmt_start;
4677 argidx = argidx_start;
4678 goto unicode;
4679 }
Georg Brandld45014b2005-10-01 17:06:00 +00004680#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004681 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004682#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004683 if (temp != NULL && PyUnicode_Check(temp)) {
4684 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004685 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004686 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004687 goto unicode;
4688 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004689#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004690 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004691 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004692 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004693 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004694 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004695 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004696 if (!PyString_Check(temp)) {
4697 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004698 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004699 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004700 goto error;
4701 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004702 pbuf = PyString_AS_STRING(temp);
4703 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004704 if (prec >= 0 && len > prec)
4705 len = prec;
4706 break;
4707 case 'i':
4708 case 'd':
4709 case 'u':
4710 case 'o':
4711 case 'x':
4712 case 'X':
4713 if (c == 'i')
4714 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004715 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004716 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004717 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004718 prec, c, &pbuf, &ilen);
4719 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004720 if (!temp)
4721 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004723 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004724 else {
4725 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004726 len = formatint(pbuf,
4727 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004728 flags, prec, c, v);
4729 if (len < 0)
4730 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004731 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004732 }
4733 if (flags & F_ZERO)
4734 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004735 break;
4736 case 'e':
4737 case 'E':
4738 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004739 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004740 case 'g':
4741 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004742 if (c == 'F')
4743 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004744 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004745 len = formatfloat(pbuf, sizeof(formatbuf),
4746 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004747 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004748 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004749 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004750 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004751 fill = '0';
4752 break;
4753 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004754#ifdef Py_USING_UNICODE
4755 if (PyUnicode_Check(v)) {
4756 fmt = fmt_start;
4757 argidx = argidx_start;
4758 goto unicode;
4759 }
4760#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004761 pbuf = formatbuf;
4762 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004763 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004764 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004765 break;
4766 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004767 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004768 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004769 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004770 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004771 (Py_ssize_t)(fmt - 1 -
4772 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004773 goto error;
4774 }
4775 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004776 if (*pbuf == '-' || *pbuf == '+') {
4777 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004778 len--;
4779 }
4780 else if (flags & F_SIGN)
4781 sign = '+';
4782 else if (flags & F_BLANK)
4783 sign = ' ';
4784 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004785 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004786 }
4787 if (width < len)
4788 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004789 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004790 reslen -= rescnt;
4791 rescnt = width + fmtcnt + 100;
4792 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004793 if (reslen < 0) {
4794 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004795 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004796 return PyErr_NoMemory();
4797 }
Georg Brandl5f795862007-02-26 13:51:34 +00004798 if (_PyString_Resize(&result, reslen) < 0) {
4799 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004800 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004801 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004802 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004803 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004804 }
4805 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004806 if (fill != ' ')
4807 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004808 rescnt--;
4809 if (width > len)
4810 width--;
4811 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004812 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4813 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004814 assert(pbuf[1] == c);
4815 if (fill != ' ') {
4816 *res++ = *pbuf++;
4817 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004818 }
Tim Petersfff53252001-04-12 18:38:48 +00004819 rescnt -= 2;
4820 width -= 2;
4821 if (width < 0)
4822 width = 0;
4823 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 }
4825 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004826 do {
4827 --rescnt;
4828 *res++ = fill;
4829 } while (--width > len);
4830 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004831 if (fill == ' ') {
4832 if (sign)
4833 *res++ = sign;
4834 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004835 (c == 'x' || c == 'X')) {
4836 assert(pbuf[0] == '0');
4837 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004838 *res++ = *pbuf++;
4839 *res++ = *pbuf++;
4840 }
4841 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004842 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004843 res += len;
4844 rescnt -= len;
4845 while (--width >= len) {
4846 --rescnt;
4847 *res++ = ' ';
4848 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004849 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004850 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004851 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004852 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004853 goto error;
4854 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004855 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004856 } /* '%' */
4857 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004858 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004859 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004860 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004861 goto error;
4862 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004863 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004864 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004865 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004866 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004867 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004868
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004869#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004870 unicode:
4871 if (args_owned) {
4872 Py_DECREF(args);
4873 args_owned = 0;
4874 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004875 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004876 if (PyTuple_Check(orig_args) && argidx > 0) {
4877 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004878 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004879 v = PyTuple_New(n);
4880 if (v == NULL)
4881 goto error;
4882 while (--n >= 0) {
4883 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4884 Py_INCREF(w);
4885 PyTuple_SET_ITEM(v, n, w);
4886 }
4887 args = v;
4888 } else {
4889 Py_INCREF(orig_args);
4890 args = orig_args;
4891 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004892 args_owned = 1;
4893 /* Take what we have of the result and let the Unicode formatting
4894 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004895 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004896 if (_PyString_Resize(&result, rescnt))
4897 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004898 fmtcnt = PyString_GET_SIZE(format) - \
4899 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004900 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4901 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004902 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004903 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004904 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004905 if (v == NULL)
4906 goto error;
4907 /* Paste what we have (result) to what the Unicode formatting
4908 function returned (v) and return the result (or error) */
4909 w = PyUnicode_Concat(result, v);
4910 Py_DECREF(result);
4911 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004912 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004913 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004914#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004915
Guido van Rossume5372401993-03-16 12:15:04 +00004916 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004917 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004918 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004919 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004920 }
Guido van Rossume5372401993-03-16 12:15:04 +00004921 return NULL;
4922}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004923
Guido van Rossum2a61e741997-01-18 07:55:05 +00004924void
Fred Drakeba096332000-07-09 07:04:36 +00004925PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004926{
4927 register PyStringObject *s = (PyStringObject *)(*p);
4928 PyObject *t;
4929 if (s == NULL || !PyString_Check(s))
4930 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004931 /* If it's a string subclass, we don't really know what putting
4932 it in the interned dict might do. */
4933 if (!PyString_CheckExact(s))
4934 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004935 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004937 if (interned == NULL) {
4938 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939 if (interned == NULL) {
4940 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004942 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004943 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004944 t = PyDict_GetItem(interned, (PyObject *)s);
4945 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004946 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004947 Py_DECREF(*p);
4948 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004949 return;
4950 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004951
Armin Rigo79f7ad22004-08-07 19:27:39 +00004952 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004953 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004954 return;
4955 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004956 /* The two references in interned are not counted by refcnt.
4957 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004958 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004959 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004960}
4961
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962void
4963PyString_InternImmortal(PyObject **p)
4964{
4965 PyString_InternInPlace(p);
4966 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4967 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4968 Py_INCREF(*p);
4969 }
4970}
4971
Guido van Rossum2a61e741997-01-18 07:55:05 +00004972
4973PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004974PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004975{
4976 PyObject *s = PyString_FromString(cp);
4977 if (s == NULL)
4978 return NULL;
4979 PyString_InternInPlace(&s);
4980 return s;
4981}
4982
Guido van Rossum8cf04761997-08-02 02:57:45 +00004983void
Fred Drakeba096332000-07-09 07:04:36 +00004984PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004985{
4986 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004987 for (i = 0; i < UCHAR_MAX + 1; i++) {
4988 Py_XDECREF(characters[i]);
4989 characters[i] = NULL;
4990 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004991 Py_XDECREF(nullstring);
4992 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004993}
Barry Warsawa903ad982001-02-23 16:40:48 +00004994
Barry Warsawa903ad982001-02-23 16:40:48 +00004995void _Py_ReleaseInternedStrings(void)
4996{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004997 PyObject *keys;
4998 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004999 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005000
5001 if (interned == NULL || !PyDict_Check(interned))
5002 return;
5003 keys = PyDict_Keys(interned);
5004 if (keys == NULL || !PyList_Check(keys)) {
5005 PyErr_Clear();
5006 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005007 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005008
5009 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5010 detector, interned strings are not forcibly deallocated; rather, we
5011 give them their stolen references back, and then clear and DECREF
5012 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005013
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005014 fprintf(stderr, "releasing interned strings\n");
5015 n = PyList_GET_SIZE(keys);
5016 for (i = 0; i < n; i++) {
5017 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5018 switch (s->ob_sstate) {
5019 case SSTATE_NOT_INTERNED:
5020 /* XXX Shouldn't happen */
5021 break;
5022 case SSTATE_INTERNED_IMMORTAL:
5023 s->ob_refcnt += 1;
5024 break;
5025 case SSTATE_INTERNED_MORTAL:
5026 s->ob_refcnt += 2;
5027 break;
5028 default:
5029 Py_FatalError("Inconsistent interned string state.");
5030 }
5031 s->ob_sstate = SSTATE_NOT_INTERNED;
5032 }
5033 Py_DECREF(keys);
5034 PyDict_Clear(interned);
5035 Py_DECREF(interned);
5036 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005037}