blob: 110c38ee01eea8d0aa7593049b2c00955778fdc4 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000275 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Thomas Wouters477c8d52006-05-27 19:21:47 +0000693/* -------------------------------------------------------------------- */
694/* object api */
695
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697string_getsize(register PyObject *op)
698{
699 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000700 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000701 if (PyString_AsStringAndSize(op, &s, &len))
702 return -1;
703 return len;
704}
705
706static /*const*/ char *
707string_getbuffer(register PyObject *op)
708{
709 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000710 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000711 if (PyString_AsStringAndSize(op, &s, &len))
712 return NULL;
713 return s;
714}
715
Martin v. Löwis18e16552006-02-15 17:27:45 +0000716Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000717PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (!PyString_Check(op))
720 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722}
723
724/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732int
733PyString_AsStringAndSize(register PyObject *obj,
734 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000735 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000736{
737 if (s == NULL) {
738 PyErr_BadInternalCall();
739 return -1;
740 }
741
742 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000743#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744 if (PyUnicode_Check(obj)) {
745 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
746 if (obj == NULL)
747 return -1;
748 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000749 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000750#endif
751 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 PyErr_Format(PyExc_TypeError,
753 "expected string or Unicode object, "
754 "%.200s found", obj->ob_type->tp_name);
755 return -1;
756 }
757 }
758
759 *s = PyString_AS_STRING(obj);
760 if (len != NULL)
761 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000762 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000763 PyErr_SetString(PyExc_TypeError,
764 "expected string without null bytes");
765 return -1;
766 }
767 return 0;
768}
769
Thomas Wouters477c8d52006-05-27 19:21:47 +0000770/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771/* Methods */
772
Thomas Wouters477c8d52006-05-27 19:21:47 +0000773#define STRINGLIB_CHAR char
774
775#define STRINGLIB_CMP memcmp
776#define STRINGLIB_LEN PyString_GET_SIZE
777#define STRINGLIB_NEW PyString_FromStringAndSize
778#define STRINGLIB_STR PyString_AS_STRING
779
780#define STRINGLIB_EMPTY nullstring
781
782#include "stringlib/fastsearch.h"
783
784#include "stringlib/count.h"
785#include "stringlib/find.h"
786#include "stringlib/partition.h"
787
788
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000789static int
Fred Drakeba096332000-07-09 07:04:36 +0000790string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000792 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000795
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000796 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000797 if (! PyString_CheckExact(op)) {
798 int ret;
799 /* A str subclass may have its own __str__ method. */
800 op = (PyStringObject *) PyObject_Str((PyObject *)op);
801 if (op == NULL)
802 return -1;
803 ret = string_print(op, fp, flags);
804 Py_DECREF(op);
805 return ret;
806 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000807 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000808#ifdef __VMS
809 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
810#else
811 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
812#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000813 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000818 if (memchr(op->ob_sval, '\'', op->ob_size) &&
819 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 quote = '"';
821
822 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 for (i = 0; i < op->ob_size; i++) {
824 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000827 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000828 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000829 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000830 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000831 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000832 fprintf(fp, "\\r");
833 else if (c < ' ' || c >= 0x7f)
834 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000835 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000836 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000839 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840}
841
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842PyObject *
843PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000845 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000846 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000847 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000848 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000849 PyErr_SetString(PyExc_OverflowError,
850 "string is too large to make repr");
851 }
852 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000854 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
856 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000857 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000858 register char c;
859 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 int quote;
861
Thomas Wouters7e474022000-07-16 12:04:32 +0000862 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000863 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000864 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000865 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000866 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 quote = '"';
868
Tim Peters9161c8b2001-12-03 01:55:38 +0000869 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000870 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000872 /* There's at least enough room for a hex escape
873 and a closing quote. */
874 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000878 else if (c == '\t')
879 *p++ = '\\', *p++ = 't';
880 else if (c == '\n')
881 *p++ = '\\', *p++ = 'n';
882 else if (c == '\r')
883 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000884 else if (c < ' ' || c >= 0x7f) {
885 /* For performance, we don't want to call
886 PyOS_snprintf here (extra layers of
887 function call). */
888 sprintf(p, "\\x%02x", c & 0xff);
889 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000890 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000891 else
892 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000894 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000897 _PyString_Resize(
Thomas Woutersd4ec0c32006-04-21 16:44:05 +0000898 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000899 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901}
902
Guido van Rossum189f1df2001-05-01 16:51:53 +0000903static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000904string_repr(PyObject *op)
905{
906 return PyString_Repr(op, 1);
907}
908
909static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000910string_str(PyObject *s)
911{
Tim Petersc9933152001-10-16 20:18:24 +0000912 assert(PyString_Check(s));
913 if (PyString_CheckExact(s)) {
914 Py_INCREF(s);
915 return s;
916 }
917 else {
918 /* Subtype -- return genuine string with the same value. */
919 PyStringObject *t = (PyStringObject *) s;
920 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
921 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000922}
923
Martin v. Löwis18e16552006-02-15 17:27:45 +0000924static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000925string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000926{
927 return a->ob_size;
928}
929
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000931string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000933 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 register PyStringObject *op;
935 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000936#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000937 if (PyUnicode_Check(bb))
938 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000939#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000940 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000941 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000942 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 return NULL;
944 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000947 if ((a->ob_size == 0 || b->ob_size == 0) &&
948 PyString_CheckExact(a) && PyString_CheckExact(b)) {
949 if (a->ob_size == 0) {
950 Py_INCREF(bb);
951 return bb;
952 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953 Py_INCREF(a);
954 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 }
956 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000957 if (size < 0) {
958 PyErr_SetString(PyExc_OverflowError,
959 "strings are too large to concat");
960 return NULL;
961 }
962
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000963 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000964 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000965 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000967 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000968 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000969 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000970 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
971 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000972 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000973 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974#undef b
975}
976
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000978string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000980 register Py_ssize_t i;
981 register Py_ssize_t j;
982 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000984 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985 if (n < 0)
986 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000987 /* watch out for overflows: the size can overflow int,
988 * and the # of bytes needed can overflow size_t
989 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000991 if (n && size / n != a->ob_size) {
992 PyErr_SetString(PyExc_OverflowError,
993 "repeated string is too long");
994 return NULL;
995 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000996 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
Tim Peterse7c05322004-06-27 17:24:49 +00001000 nbytes = (size_t)size;
1001 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001002 PyErr_SetString(PyExc_OverflowError,
1003 "repeated string is too long");
1004 return NULL;
1005 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001007 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001008 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001009 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001010 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001011 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001012 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001013 op->ob_sval[size] = '\0';
1014 if (a->ob_size == 1 && n > 0) {
1015 memset(op->ob_sval, a->ob_sval[0] , n);
1016 return (PyObject *) op;
1017 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001018 i = 0;
1019 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1021 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001022 }
1023 while (i < size) {
1024 j = (i <= size-i) ? i : size-i;
1025 memcpy(op->ob_sval+i, op->ob_sval, j);
1026 i += j;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001029}
1030
1031/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1032
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001033static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001034string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001035 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001036 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
1038 if (i < 0)
1039 i = 0;
1040 if (j < 0)
1041 j = 0; /* Avoid signed/unsigned bug in next line */
1042 if (j > a->ob_size)
1043 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001044 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1045 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 Py_INCREF(a);
1047 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 }
1049 if (j < i)
1050 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001051 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001052}
1053
Guido van Rossum9284a572000-03-07 15:53:43 +00001054static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001056{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001058#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 if (PyUnicode_Check(sub_obj))
1060 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001061#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001063 PyErr_SetString(PyExc_TypeError,
1064 "'in <string>' requires string as left operand");
1065 return -1;
1066 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001067 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001068
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001070}
1071
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001072static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001078 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001079 return NULL;
1080 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001081 pchar = a->ob_sval[i];
1082 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001083 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001084 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001085 else {
1086#ifdef COUNT_ALLOCS
1087 one_strings++;
1088#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001089 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001090 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001091 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092}
1093
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094static PyObject*
1095string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001098 Py_ssize_t len_a, len_b;
1099 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 PyObject *result;
1101
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001102 /* Make sure both arguments are strings. */
1103 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 result = Py_NotImplemented;
1105 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001106 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107 if (a == b) {
1108 switch (op) {
1109 case Py_EQ:case Py_LE:case Py_GE:
1110 result = Py_True;
1111 goto out;
1112 case Py_NE:case Py_LT:case Py_GT:
1113 result = Py_False;
1114 goto out;
1115 }
1116 }
1117 if (op == Py_EQ) {
1118 /* Supporting Py_NE here as well does not save
1119 much time, since Py_NE is rarely used. */
1120 if (a->ob_size == b->ob_size
1121 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001122 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 a->ob_size) == 0)) {
1124 result = Py_True;
1125 } else {
1126 result = Py_False;
1127 }
1128 goto out;
1129 }
1130 len_a = a->ob_size; len_b = b->ob_size;
1131 min_len = (len_a < len_b) ? len_a : len_b;
1132 if (min_len > 0) {
1133 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1134 if (c==0)
1135 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1136 }else
1137 c = 0;
1138 if (c == 0)
1139 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1140 switch (op) {
1141 case Py_LT: c = c < 0; break;
1142 case Py_LE: c = c <= 0; break;
1143 case Py_EQ: assert(0); break; /* unreachable */
1144 case Py_NE: c = c != 0; break;
1145 case Py_GT: c = c > 0; break;
1146 case Py_GE: c = c >= 0; break;
1147 default:
1148 result = Py_NotImplemented;
1149 goto out;
1150 }
1151 result = c ? Py_True : Py_False;
1152 out:
1153 Py_INCREF(result);
1154 return result;
1155}
1156
1157int
1158_PyString_Eq(PyObject *o1, PyObject *o2)
1159{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001160 PyStringObject *a = (PyStringObject*) o1;
1161 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001162 return a->ob_size == b->ob_size
1163 && *a->ob_sval == *b->ob_sval
1164 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001165}
1166
Guido van Rossum9bfef441993-03-29 10:43:31 +00001167static long
Fred Drakeba096332000-07-09 07:04:36 +00001168string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001169{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001170 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001171 register unsigned char *p;
1172 register long x;
1173
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001174 if (a->ob_shash != -1)
1175 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001176 len = a->ob_size;
1177 p = (unsigned char *) a->ob_sval;
1178 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001180 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181 x ^= a->ob_size;
1182 if (x == -1)
1183 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185 return x;
1186}
1187
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001188#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1189
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190static PyObject*
1191string_subscript(PyStringObject* self, PyObject* item)
1192{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001193 PyNumberMethods *nb = item->ob_type->tp_as_number;
1194 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1195 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001196 if (i == -1 && PyErr_Occurred())
1197 return NULL;
1198 if (i < 0)
1199 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001200 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201 }
1202 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001203 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 char* source_buf;
1205 char* result_buf;
1206 PyObject* result;
1207
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001208 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 PyString_GET_SIZE(self),
1210 &start, &stop, &step, &slicelength) < 0) {
1211 return NULL;
1212 }
1213
1214 if (slicelength <= 0) {
1215 return PyString_FromStringAndSize("", 0);
1216 }
1217 else {
1218 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001219 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001220 if (result_buf == NULL)
1221 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001222
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001223 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 cur += step, i++) {
1225 result_buf[i] = source_buf[cur];
1226 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001227
1228 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001229 slicelength);
1230 PyMem_Free(result_buf);
1231 return result;
1232 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001233 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 else {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001235 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001236 "string indices must be integers");
1237 return NULL;
1238 }
1239}
1240
Martin v. Löwis18e16552006-02-15 17:27:45 +00001241static Py_ssize_t
1242string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001243{
1244 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001245 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001246 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001247 return -1;
1248 }
1249 *ptr = (void *)self->ob_sval;
1250 return self->ob_size;
1251}
1252
Martin v. Löwis18e16552006-02-15 17:27:45 +00001253static Py_ssize_t
1254string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255{
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001257 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259}
1260
Martin v. Löwis18e16552006-02-15 17:27:45 +00001261static Py_ssize_t
1262string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263{
1264 if ( lenp )
1265 *lenp = self->ob_size;
1266 return 1;
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001271{
1272 if ( index != 0 ) {
1273 PyErr_SetString(PyExc_SystemError,
1274 "accessing non-existent string segment");
1275 return -1;
1276 }
1277 *ptr = self->ob_sval;
1278 return self->ob_size;
1279}
1280
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001281static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001282 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001283 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284 (ssizeargfunc)string_repeat, /*sq_repeat*/
1285 (ssizeargfunc)string_item, /*sq_item*/
1286 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001287 0, /*sq_ass_item*/
1288 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001289 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001290};
1291
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001292static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001294 (binaryfunc)string_subscript,
1295 0,
1296};
1297
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001299 (readbufferproc)string_buffer_getreadbuf,
1300 (writebufferproc)string_buffer_getwritebuf,
1301 (segcountproc)string_buffer_getsegcount,
1302 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001303};
1304
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305
1306
1307#define LEFTSTRIP 0
1308#define RIGHTSTRIP 1
1309#define BOTHSTRIP 2
1310
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001311/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001312static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1313
1314#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001315
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316
1317/* Don't call if length < 2 */
1318#define Py_STRING_MATCH(target, offset, pattern, length) \
1319 (target[offset] == pattern[0] && \
1320 target[offset+length-1] == pattern[length-1] && \
1321 !memcmp(target+offset+1, pattern+1, length-2) )
1322
1323
1324/* Overallocate the initial list to reduce the number of reallocs for small
1325 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1326 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1327 text (roughly 11 words per line) and field delimited data (usually 1-10
1328 fields). For large strings the split algorithms are bandwidth limited
1329 so increasing the preallocation likely will not improve things.*/
1330
1331#define MAX_PREALLOC 12
1332
1333/* 5 splits gives 6 elements */
1334#define PREALLOC_SIZE(maxsplit) \
1335 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1336
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337#define SPLIT_APPEND(data, left, right) \
1338 str = PyString_FromStringAndSize((data) + (left), \
1339 (right) - (left)); \
1340 if (str == NULL) \
1341 goto onError; \
1342 if (PyList_Append(list, str)) { \
1343 Py_DECREF(str); \
1344 goto onError; \
1345 } \
1346 else \
1347 Py_DECREF(str);
1348
Thomas Wouters477c8d52006-05-27 19:21:47 +00001349#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 str = PyString_FromStringAndSize((data) + (left), \
1351 (right) - (left)); \
1352 if (str == NULL) \
1353 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001354 if (count < MAX_PREALLOC) { \
1355 PyList_SET_ITEM(list, count, str); \
1356 } else { \
1357 if (PyList_Append(list, str)) { \
1358 Py_DECREF(str); \
1359 goto onError; \
1360 } \
1361 else \
1362 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001363 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001364 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365
Thomas Wouters477c8d52006-05-27 19:21:47 +00001366/* Always force the list to the expected size. */
1367#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1368
1369#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1370#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1371#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1372#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1373
1374Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001375split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001378 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001379 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380
1381 if (list == NULL)
1382 return NULL;
1383
Thomas Wouters477c8d52006-05-27 19:21:47 +00001384 i = j = 0;
1385
1386 while (maxsplit-- > 0) {
1387 SKIP_SPACE(s, i, len);
1388 if (i==len) break;
1389 j = i; i++;
1390 SKIP_NONSPACE(s, i, len);
1391 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001393
1394 if (i < len) {
1395 /* Only occurs when maxsplit was reached */
1396 /* Skip any remaining whitespace and copy to end of string */
1397 SKIP_SPACE(s, i, len);
1398 if (i != len)
1399 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001401 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001403 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404 Py_DECREF(list);
1405 return NULL;
1406}
1407
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001409split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001411 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414
1415 if (list == NULL)
1416 return NULL;
1417
Thomas Wouters477c8d52006-05-27 19:21:47 +00001418 i = j = 0;
1419 while ((j < len) && (maxcount-- > 0)) {
1420 for(; j<len; j++) {
1421 /* I found that using memchr makes no difference */
1422 if (s[j] == ch) {
1423 SPLIT_ADD(s, i, j);
1424 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001426 }
1427 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 if (i <= len) {
1430 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001431 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001432 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 return list;
1434
1435 onError:
1436 Py_DECREF(list);
1437 return NULL;
1438}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001440PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441"S.split([sep [,maxsplit]]) -> list of strings\n\
1442\n\
1443Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001444delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001445splits are done. If sep is not specified or is None, any\n\
1446whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001449string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001451 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001452 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001453 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001454 PyObject *list, *str, *subobj = Py_None;
1455#ifdef USE_FAST
1456 Py_ssize_t pos;
1457#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001462 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001463 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001465 if (PyString_Check(subobj)) {
1466 sub = PyString_AS_STRING(subobj);
1467 n = PyString_GET_SIZE(subobj);
1468 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001469#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001470 else if (PyUnicode_Check(subobj))
1471 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001472#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1474 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001475
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476 if (n == 0) {
1477 PyErr_SetString(PyExc_ValueError, "empty separator");
1478 return NULL;
1479 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001480 else if (n == 1)
1481 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482
Thomas Wouters477c8d52006-05-27 19:21:47 +00001483 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 if (list == NULL)
1485 return NULL;
1486
Thomas Wouters477c8d52006-05-27 19:21:47 +00001487#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001489 while (maxsplit-- > 0) {
1490 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1491 if (pos < 0)
1492 break;
1493 j = i+pos;
1494 SPLIT_ADD(s, i, j);
1495 i = j + n;
1496
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001498#else
1499 i = j = 0;
1500 while ((j+n <= len) && (maxsplit-- > 0)) {
1501 for (; j+n <= len; j++) {
1502 if (Py_STRING_MATCH(s, j, sub, n)) {
1503 SPLIT_ADD(s, i, j);
1504 i = j = j + n;
1505 break;
1506 }
1507 }
1508 }
1509#endif
1510 SPLIT_ADD(s, i, len);
1511 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 return list;
1513
Thomas Wouters477c8d52006-05-27 19:21:47 +00001514 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 Py_DECREF(list);
1516 return NULL;
1517}
1518
Thomas Wouters477c8d52006-05-27 19:21:47 +00001519PyDoc_STRVAR(partition__doc__,
1520"S.partition(sep) -> (head, sep, tail)\n\
1521\n\
1522Searches for the separator sep in S, and returns the part before it,\n\
1523the separator itself, and the part after it. If the separator is not\n\
1524found, returns S and two empty strings.");
1525
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001526static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001527string_partition(PyStringObject *self, PyObject *sep_obj)
1528{
1529 const char *sep;
1530 Py_ssize_t sep_len;
1531
1532 if (PyString_Check(sep_obj)) {
1533 sep = PyString_AS_STRING(sep_obj);
1534 sep_len = PyString_GET_SIZE(sep_obj);
1535 }
1536#ifdef Py_USING_UNICODE
1537 else if (PyUnicode_Check(sep_obj))
1538 return PyUnicode_Partition((PyObject *) self, sep_obj);
1539#endif
1540 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1541 return NULL;
1542
1543 return stringlib_partition(
1544 (PyObject*) self,
1545 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1546 sep_obj, sep, sep_len
1547 );
1548}
1549
1550PyDoc_STRVAR(rpartition__doc__,
1551"S.rpartition(sep) -> (head, sep, tail)\n\
1552\n\
1553Searches for the separator sep in S, starting at the end of S, and returns\n\
1554the part before it, the separator itself, and the part after it. If the\n\
1555separator is not found, returns S and two empty strings.");
1556
1557static PyObject *
1558string_rpartition(PyStringObject *self, PyObject *sep_obj)
1559{
1560 const char *sep;
1561 Py_ssize_t sep_len;
1562
1563 if (PyString_Check(sep_obj)) {
1564 sep = PyString_AS_STRING(sep_obj);
1565 sep_len = PyString_GET_SIZE(sep_obj);
1566 }
1567#ifdef Py_USING_UNICODE
1568 else if (PyUnicode_Check(sep_obj))
1569 return PyUnicode_Partition((PyObject *) self, sep_obj);
1570#endif
1571 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1572 return NULL;
1573
1574 return stringlib_rpartition(
1575 (PyObject*) self,
1576 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1577 sep_obj, sep, sep_len
1578 );
1579}
1580
1581Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001582rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001583{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001584 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001585 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001586 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001587
1588 if (list == NULL)
1589 return NULL;
1590
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 i = j = len-1;
1592
1593 while (maxsplit-- > 0) {
1594 RSKIP_SPACE(s, i);
1595 if (i<0) break;
1596 j = i; i--;
1597 RSKIP_NONSPACE(s, i);
1598 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001599 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001600 if (i >= 0) {
1601 /* Only occurs when maxsplit was reached */
1602 /* Skip any remaining whitespace and copy to beginning of string */
1603 RSKIP_SPACE(s, i);
1604 if (i >= 0)
1605 SPLIT_ADD(s, 0, i + 1);
1606
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001608 FIX_PREALLOC_SIZE(list);
1609 if (PyList_Reverse(list) < 0)
1610 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001611 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613 Py_DECREF(list);
1614 return NULL;
1615}
1616
Thomas Wouters477c8d52006-05-27 19:21:47 +00001617Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001618rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001620 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001621 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001622 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001623
1624 if (list == NULL)
1625 return NULL;
1626
Thomas Wouters477c8d52006-05-27 19:21:47 +00001627 i = j = len - 1;
1628 while ((i >= 0) && (maxcount-- > 0)) {
1629 for (; i >= 0; i--) {
1630 if (s[i] == ch) {
1631 SPLIT_ADD(s, i + 1, j + 1);
1632 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001634 }
1635 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001636 }
1637 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001638 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001640 FIX_PREALLOC_SIZE(list);
1641 if (PyList_Reverse(list) < 0)
1642 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 return list;
1644
1645 onError:
1646 Py_DECREF(list);
1647 return NULL;
1648}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001649
1650PyDoc_STRVAR(rsplit__doc__,
1651"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1652\n\
1653Return a list of the words in the string S, using sep as the\n\
1654delimiter string, starting at the end of the string and working\n\
1655to the front. If maxsplit is given, at most maxsplit splits are\n\
1656done. If sep is not specified or is None, any whitespace string\n\
1657is a separator.");
1658
1659static PyObject *
1660string_rsplit(PyStringObject *self, PyObject *args)
1661{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001662 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001663 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001665 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001667 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668 return NULL;
1669 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001670 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001671 if (subobj == Py_None)
1672 return rsplit_whitespace(s, len, maxsplit);
1673 if (PyString_Check(subobj)) {
1674 sub = PyString_AS_STRING(subobj);
1675 n = PyString_GET_SIZE(subobj);
1676 }
1677#ifdef Py_USING_UNICODE
1678 else if (PyUnicode_Check(subobj))
1679 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1680#endif
1681 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1682 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001683
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001684 if (n == 0) {
1685 PyErr_SetString(PyExc_ValueError, "empty separator");
1686 return NULL;
1687 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001688 else if (n == 1)
1689 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690
Thomas Wouters477c8d52006-05-27 19:21:47 +00001691 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 if (list == NULL)
1693 return NULL;
1694
1695 j = len;
1696 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001697
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698 while ( (i >= 0) && (maxsplit-- > 0) ) {
1699 for (; i>=0; i--) {
1700 if (Py_STRING_MATCH(s, i, sub, n)) {
1701 SPLIT_ADD(s, i + n, j);
1702 j = i;
1703 i -= n;
1704 break;
1705 }
1706 }
1707 }
1708 SPLIT_ADD(s, 0, j);
1709 FIX_PREALLOC_SIZE(list);
1710 if (PyList_Reverse(list) < 0)
1711 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712 return list;
1713
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 Py_DECREF(list);
1716 return NULL;
1717}
1718
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001720PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721"S.join(sequence) -> string\n\
1722\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001724sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725
1726static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001727string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728{
1729 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001733 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001734 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001735 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001736 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737
Tim Peters19fe14e2001-01-19 03:03:47 +00001738 seq = PySequence_Fast(orig, "");
1739 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001740 return NULL;
1741 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001742
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001743 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 if (seqlen == 0) {
1745 Py_DECREF(seq);
1746 return PyString_FromString("");
1747 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001749 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001750 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1751 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001753 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001756
Raymond Hettinger674f2412004-08-23 23:23:54 +00001757 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001758 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001759 * Do a pre-pass to figure out the total amount of space we'll
1760 * need (sz), see whether any argument is absurd, and defer to
1761 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001765 item = PySequence_Fast_GET_ITEM(seq, i);
1766 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001767#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001768 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001769 /* Defer to Unicode join.
1770 * CAUTION: There's no gurantee that the
1771 * original sequence can be iterated over
1772 * again, so we must pass seq here.
1773 */
1774 PyObject *result;
1775 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001776 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001777 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001779#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001780 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001781 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001782 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001783 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 Py_DECREF(seq);
1785 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001787 sz += PyString_GET_SIZE(item);
1788 if (i != 0)
1789 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001790 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 PyErr_SetString(PyExc_OverflowError,
1792 "join() is too long for a Python string");
1793 Py_DECREF(seq);
1794 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001796 }
1797
1798 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001799 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001800 if (res == NULL) {
1801 Py_DECREF(seq);
1802 return NULL;
1803 }
1804
1805 /* Catenate everything. */
1806 p = PyString_AS_STRING(res);
1807 for (i = 0; i < seqlen; ++i) {
1808 size_t n;
1809 item = PySequence_Fast_GET_ITEM(seq, i);
1810 n = PyString_GET_SIZE(item);
1811 memcpy(p, PyString_AS_STRING(item), n);
1812 p += n;
1813 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001814 memcpy(p, sep, seplen);
1815 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001816 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001818
Jeremy Hylton49048292000-07-11 03:28:17 +00001819 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
Tim Peters52e155e2001-06-16 05:42:57 +00001823PyObject *
1824_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001825{
Tim Petersa7259592001-06-16 05:11:17 +00001826 assert(sep != NULL && PyString_Check(sep));
1827 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001828 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001829}
1830
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001832string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001833{
1834 if (*end > len)
1835 *end = len;
1836 else if (*end < 0)
1837 *end += len;
1838 if (*end < 0)
1839 *end = 0;
1840 if (*start < 0)
1841 *start += len;
1842 if (*start < 0)
1843 *start = 0;
1844}
1845
Thomas Wouters477c8d52006-05-27 19:21:47 +00001846Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001847string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001850 const char *sub;
1851 Py_ssize_t sub_len;
1852 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854 /* XXX ssize_t i */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001855 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1856 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 return -2;
1858 if (PyString_Check(subobj)) {
1859 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001860 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001862#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001864 return PyUnicode_Find(
1865 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001866#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001867 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 return -2;
1869
Thomas Wouters477c8d52006-05-27 19:21:47 +00001870 if (dir > 0)
1871 return stringlib_find_slice(
1872 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1873 sub, sub_len, start, end);
1874 else
1875 return stringlib_rfind_slice(
1876 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1877 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878}
1879
1880
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001881PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882"S.find(sub [,start [,end]]) -> int\n\
1883\n\
1884Return the lowest index in S where substring sub is found,\n\
1885such that sub is contained within s[start,end]. Optional\n\
1886arguments start and end are interpreted as in slice notation.\n\
1887\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889
1890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001891string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 if (result == -2)
1895 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001896 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897}
1898
1899
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001900PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901"S.index(sub [,start [,end]]) -> int\n\
1902\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001903Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904
1905static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001906string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001908 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 if (result == -2)
1910 return NULL;
1911 if (result == -1) {
1912 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001913 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 return NULL;
1915 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917}
1918
1919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921"S.rfind(sub [,start [,end]]) -> int\n\
1922\n\
1923Return the highest index in S where substring sub is found,\n\
1924such that sub is contained within s[start,end]. Optional\n\
1925arguments start and end are interpreted as in slice notation.\n\
1926\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001927Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001930string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 if (result == -2)
1934 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001935 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940"S.rindex(sub [,start [,end]]) -> int\n\
1941\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001942Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943
1944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001945string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001947 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 if (result == -2)
1949 return NULL;
1950 if (result == -1) {
1951 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001952 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 return NULL;
1954 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Thomas Wouters477c8d52006-05-27 19:21:47 +00001959Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001960do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1961{
1962 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1966 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967
1968 i = 0;
1969 if (striptype != RIGHTSTRIP) {
1970 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1971 i++;
1972 }
1973 }
1974
1975 j = len;
1976 if (striptype != LEFTSTRIP) {
1977 do {
1978 j--;
1979 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1980 j++;
1981 }
1982
1983 if (i == 0 && j == len && PyString_CheckExact(self)) {
1984 Py_INCREF(self);
1985 return (PyObject*)self;
1986 }
1987 else
1988 return PyString_FromStringAndSize(s+i, j-i);
1989}
1990
1991
Thomas Wouters477c8d52006-05-27 19:21:47 +00001992Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001993do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994{
1995 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001996 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998 i = 0;
1999 if (striptype != RIGHTSTRIP) {
2000 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2001 i++;
2002 }
2003 }
2004
2005 j = len;
2006 if (striptype != LEFTSTRIP) {
2007 do {
2008 j--;
2009 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2010 j++;
2011 }
2012
Tim Peters8fa5dd02001-09-12 02:18:30 +00002013 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 Py_INCREF(self);
2015 return (PyObject*)self;
2016 }
2017 else
2018 return PyString_FromStringAndSize(s+i, j-i);
2019}
2020
2021
Thomas Wouters477c8d52006-05-27 19:21:47 +00002022Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002023do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2024{
2025 PyObject *sep = NULL;
2026
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002027 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002028 return NULL;
2029
2030 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002031 if (PyString_Check(sep))
2032 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002033#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002034 else if (PyUnicode_Check(sep)) {
2035 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2036 PyObject *res;
2037 if (uniself==NULL)
2038 return NULL;
2039 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2040 striptype, sep);
2041 Py_DECREF(uniself);
2042 return res;
2043 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002045 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002046#ifdef Py_USING_UNICODE
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002047 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002048#else
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002049 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002050#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002051 STRIPNAME(striptype));
2052 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002053 }
2054
2055 return do_strip(self, striptype);
2056}
2057
2058
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002059PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002060"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061\n\
2062Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002063whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002064If chars is given and not None, remove characters in chars instead.\n\
2065If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066
2067static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002068string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002070 if (PyTuple_GET_SIZE(args) == 0)
2071 return do_strip(self, BOTHSTRIP); /* Common case */
2072 else
2073 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074}
2075
2076
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002077PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002078"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002080Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002081If chars is given and not None, remove characters in chars instead.\n\
2082If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083
2084static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087 if (PyTuple_GET_SIZE(args) == 0)
2088 return do_strip(self, LEFTSTRIP); /* Common case */
2089 else
2090 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091}
2092
2093
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002094PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002095"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002097Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098If chars is given and not None, remove characters in chars instead.\n\
2099If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100
2101static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002104 if (PyTuple_GET_SIZE(args) == 0)
2105 return do_strip(self, RIGHTSTRIP); /* Common case */
2106 else
2107 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108}
2109
2110
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112"S.lower() -> string\n\
2113\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115
Thomas Wouters477c8d52006-05-27 19:21:47 +00002116/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2117#ifndef _tolower
2118#define _tolower tolower
2119#endif
2120
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002122string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002124 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002126 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002128 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002129 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002131
2132 s = PyString_AS_STRING(newobj);
2133
2134 memcpy(s, PyString_AS_STRING(self), n);
2135
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002137 int c = Py_CHARMASK(s[i]);
2138 if (isupper(c))
2139 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002141
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002142 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143}
2144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146"S.upper() -> string\n\
2147\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002148Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
Thomas Wouters477c8d52006-05-27 19:21:47 +00002150#ifndef _toupper
2151#define _toupper toupper
2152#endif
2153
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002155string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002157 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002158 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002159 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002161 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002162 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164
2165 s = PyString_AS_STRING(newobj);
2166
2167 memcpy(s, PyString_AS_STRING(self), n);
2168
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002170 int c = Py_CHARMASK(s[i]);
2171 if (islower(c))
2172 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002174
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002175 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176}
2177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179"S.title() -> string\n\
2180\n\
2181Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002182characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183
2184static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002185string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186{
2187 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002188 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002189 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002190 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002192 newobj = PyString_FromStringAndSize(NULL, n);
2193 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002195 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 for (i = 0; i < n; i++) {
2197 int c = Py_CHARMASK(*s++);
2198 if (islower(c)) {
2199 if (!previous_is_cased)
2200 c = toupper(c);
2201 previous_is_cased = 1;
2202 } else if (isupper(c)) {
2203 if (previous_is_cased)
2204 c = tolower(c);
2205 previous_is_cased = 1;
2206 } else
2207 previous_is_cased = 0;
2208 *s_new++ = c;
2209 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002210 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211}
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.capitalize() -> string\n\
2215\n\
2216Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
2219static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002220string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221{
2222 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002223 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002224 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002226 newobj = PyString_FromStringAndSize(NULL, n);
2227 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002229 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 if (0 < n) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c))
2233 *s_new = toupper(c);
2234 else
2235 *s_new = c;
2236 s_new++;
2237 }
2238 for (i = 1; i < n; i++) {
2239 int c = Py_CHARMASK(*s++);
2240 if (isupper(c))
2241 *s_new = tolower(c);
2242 else
2243 *s_new = c;
2244 s_new++;
2245 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002246 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247}
2248
2249
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002250PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251"S.count(sub[, start[, end]]) -> int\n\
2252\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253Return the number of non-overlapping occurrences of substring sub in\n\
2254string S[start:end]. Optional arguments start and end are interpreted\n\
2255as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256
2257static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002258string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 PyObject *sub_obj;
2261 const char *str = PyString_AS_STRING(self), *sub;
2262 Py_ssize_t sub_len;
2263 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2266 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002268
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 if (PyString_Check(sub_obj)) {
2270 sub = PyString_AS_STRING(sub_obj);
2271 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002275 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002276 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002277 if (count == -1)
2278 return NULL;
2279 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002281 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002282#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 return NULL;
2285
Thomas Wouters477c8d52006-05-27 19:21:47 +00002286 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002287
Thomas Wouters477c8d52006-05-27 19:21:47 +00002288 return PyInt_FromSsize_t(
2289 stringlib_count(str + start, end - start, sub, sub_len)
2290 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291}
2292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294"S.swapcase() -> string\n\
2295\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002297converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298
2299static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002300string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301{
2302 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002303 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002304 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002306 newobj = PyString_FromStringAndSize(NULL, n);
2307 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002309 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 for (i = 0; i < n; i++) {
2311 int c = Py_CHARMASK(*s++);
2312 if (islower(c)) {
2313 *s_new = toupper(c);
2314 }
2315 else if (isupper(c)) {
2316 *s_new = tolower(c);
2317 }
2318 else
2319 *s_new = c;
2320 s_new++;
2321 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002322 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323}
2324
2325
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002326PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327"S.translate(table [,deletechars]) -> string\n\
2328\n\
2329Return a copy of the string S, where all characters occurring\n\
2330in the optional argument deletechars are removed, and the\n\
2331remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002332translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333
2334static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002335string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 register char *input, *output;
2338 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002339 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002342 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 PyObject *result;
2344 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002347 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350
2351 if (PyString_Check(tableobj)) {
2352 table1 = PyString_AS_STRING(tableobj);
2353 tablen = PyString_GET_SIZE(tableobj);
2354 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002355#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002357 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 parameter; instead a mapping to None will cause characters
2359 to be deleted. */
2360 if (delobj != NULL) {
2361 PyErr_SetString(PyExc_TypeError,
2362 "deletions are implemented differently for unicode");
2363 return NULL;
2364 }
2365 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2366 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002367#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002370
Martin v. Löwis00b61272002-12-12 20:03:19 +00002371 if (tablen != 256) {
2372 PyErr_SetString(PyExc_ValueError,
2373 "translation table must be 256 characters long");
2374 return NULL;
2375 }
2376
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 if (delobj != NULL) {
2378 if (PyString_Check(delobj)) {
2379 del_table = PyString_AS_STRING(delobj);
2380 dellen = PyString_GET_SIZE(delobj);
2381 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002382#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 else if (PyUnicode_Check(delobj)) {
2384 PyErr_SetString(PyExc_TypeError,
2385 "deletions are implemented differently for unicode");
2386 return NULL;
2387 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002388#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2390 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 }
2392 else {
2393 del_table = NULL;
2394 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 }
2396
2397 table = table1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002398 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 result = PyString_FromStringAndSize((char *)NULL, inlen);
2400 if (result == NULL)
2401 return NULL;
2402 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002403 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404
2405 if (dellen == 0) {
2406 /* If no deletions are required, use faster code */
2407 for (i = inlen; --i >= 0; ) {
2408 c = Py_CHARMASK(*input++);
2409 if (Py_CHARMASK((*output++ = table[c])) != c)
2410 changed = 1;
2411 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002412 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 return result;
2414 Py_DECREF(result);
2415 Py_INCREF(input_obj);
2416 return input_obj;
2417 }
2418
2419 for (i = 0; i < 256; i++)
2420 trans_table[i] = Py_CHARMASK(table[i]);
2421
2422 for (i = 0; i < dellen; i++)
2423 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2424
2425 for (i = inlen; --i >= 0; ) {
2426 c = Py_CHARMASK(*input++);
2427 if (trans_table[c] != -1)
2428 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2429 continue;
2430 changed = 1;
2431 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002432 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 Py_DECREF(result);
2434 Py_INCREF(input_obj);
2435 return input_obj;
2436 }
2437 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002438 if (inlen > 0)
2439 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440 return result;
2441}
2442
2443
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444#define FORWARD 1
2445#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446
Thomas Wouters477c8d52006-05-27 19:21:47 +00002447/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448
Thomas Wouters477c8d52006-05-27 19:21:47 +00002449#define findchar(target, target_len, c) \
2450 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451
Thomas Wouters477c8d52006-05-27 19:21:47 +00002452/* String ops must return a string. */
2453/* If the object is subclass of string, create a copy */
2454Py_LOCAL(PyStringObject *)
2455return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002457 if (PyString_CheckExact(self)) {
2458 Py_INCREF(self);
2459 return self;
2460 }
2461 return (PyStringObject *)PyString_FromStringAndSize(
2462 PyString_AS_STRING(self),
2463 PyString_GET_SIZE(self));
2464}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465
Thomas Wouters477c8d52006-05-27 19:21:47 +00002466Py_LOCAL_INLINE(Py_ssize_t)
2467countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
2468{
2469 Py_ssize_t count=0;
2470 char *start=target;
2471 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472
Thomas Wouters477c8d52006-05-27 19:21:47 +00002473 while ( (start=findchar(start, end-start, c)) != NULL ) {
2474 count++;
2475 if (count >= maxcount)
2476 break;
2477 start += 1;
2478 }
2479 return count;
2480}
2481
2482Py_LOCAL(Py_ssize_t)
2483findstring(char *target, Py_ssize_t target_len,
2484 char *pattern, Py_ssize_t pattern_len,
2485 Py_ssize_t start,
2486 Py_ssize_t end,
2487 int direction)
2488{
2489 if (start < 0) {
2490 start += target_len;
2491 if (start < 0)
2492 start = 0;
2493 }
2494 if (end > target_len) {
2495 end = target_len;
2496 } else if (end < 0) {
2497 end += target_len;
2498 if (end < 0)
2499 end = 0;
2500 }
2501
2502 /* zero-length substrings always match at the first attempt */
2503 if (pattern_len == 0)
2504 return (direction > 0) ? start : end;
2505
2506 end -= pattern_len;
2507
2508 if (direction < 0) {
2509 for (; end >= start; end--)
2510 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2511 return end;
2512 } else {
2513 for (; start <= end; start++)
2514 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2515 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516 }
2517 return -1;
2518}
2519
Thomas Wouters477c8d52006-05-27 19:21:47 +00002520Py_LOCAL_INLINE(Py_ssize_t)
2521countstring(char *target, Py_ssize_t target_len,
2522 char *pattern, Py_ssize_t pattern_len,
2523 Py_ssize_t start,
2524 Py_ssize_t end,
2525 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002527 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002528
Thomas Wouters477c8d52006-05-27 19:21:47 +00002529 if (start < 0) {
2530 start += target_len;
2531 if (start < 0)
2532 start = 0;
2533 }
2534 if (end > target_len) {
2535 end = target_len;
2536 } else if (end < 0) {
2537 end += target_len;
2538 if (end < 0)
2539 end = 0;
2540 }
2541
2542 /* zero-length substrings match everywhere */
2543 if (pattern_len == 0 || maxcount == 0) {
2544 if (target_len+1 < maxcount)
2545 return target_len+1;
2546 return maxcount;
2547 }
2548
2549 end -= pattern_len;
2550 if (direction < 0) {
2551 for (; (end >= start); end--)
2552 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2553 count++;
2554 if (--maxcount <= 0) break;
2555 end -= pattern_len-1;
2556 }
2557 } else {
2558 for (; (start <= end); start++)
2559 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2560 count++;
2561 if (--maxcount <= 0)
2562 break;
2563 start += pattern_len-1;
2564 }
2565 }
2566 return count;
2567}
2568
2569
2570/* Algorithms for different cases of string replacement */
2571
2572/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2573Py_LOCAL(PyStringObject *)
2574replace_interleave(PyStringObject *self,
2575 PyStringObject *to,
2576 Py_ssize_t maxcount)
2577{
2578 char *self_s, *to_s, *result_s;
2579 Py_ssize_t self_len, to_len, result_len;
2580 Py_ssize_t count, i, product;
2581 PyStringObject *result;
2582
2583 self_len = PyString_GET_SIZE(self);
2584 to_len = PyString_GET_SIZE(to);
2585
2586 /* 1 at the end plus 1 after every character */
2587 count = self_len+1;
2588 if (maxcount < count)
2589 count = maxcount;
2590
2591 /* Check for overflow */
2592 /* result_len = count * to_len + self_len; */
2593 product = count * to_len;
2594 if (product / to_len != count) {
2595 PyErr_SetString(PyExc_OverflowError,
2596 "replace string is too long");
2597 return NULL;
2598 }
2599 result_len = product + self_len;
2600 if (result_len < 0) {
2601 PyErr_SetString(PyExc_OverflowError,
2602 "replace string is too long");
2603 return NULL;
2604 }
2605
2606 if (! (result = (PyStringObject *)
2607 PyString_FromStringAndSize(NULL, result_len)) )
2608 return NULL;
2609
2610 self_s = PyString_AS_STRING(self);
2611 to_s = PyString_AS_STRING(to);
2612 to_len = PyString_GET_SIZE(to);
2613 result_s = PyString_AS_STRING(result);
2614
2615 /* TODO: special case single character, which doesn't need memcpy */
2616
2617 /* Lay the first one down (guaranteed this will occur) */
2618 memcpy(result_s, to_s, to_len);
2619 result_s += to_len;
2620 count -= 1;
2621
2622 for (i=0; i<count; i++) {
2623 *result_s++ = *self_s++;
2624 memcpy(result_s, to_s, to_len);
2625 result_s += to_len;
2626 }
2627
2628 /* Copy the rest of the original string */
2629 memcpy(result_s, self_s, self_len-i);
2630
2631 return result;
2632}
2633
2634/* Special case for deleting a single character */
2635/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2636Py_LOCAL(PyStringObject *)
2637replace_delete_single_character(PyStringObject *self,
2638 char from_c, Py_ssize_t maxcount)
2639{
2640 char *self_s, *result_s;
2641 char *start, *next, *end;
2642 Py_ssize_t self_len, result_len;
2643 Py_ssize_t count;
2644 PyStringObject *result;
2645
2646 self_len = PyString_GET_SIZE(self);
2647 self_s = PyString_AS_STRING(self);
2648
2649 count = countchar(self_s, self_len, from_c, maxcount);
2650 if (count == 0) {
2651 return return_self(self);
2652 }
2653
2654 result_len = self_len - count; /* from_len == 1 */
2655 assert(result_len>=0);
2656
2657 if ( (result = (PyStringObject *)
2658 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2659 return NULL;
2660 result_s = PyString_AS_STRING(result);
2661
2662 start = self_s;
2663 end = self_s + self_len;
2664 while (count-- > 0) {
2665 next = findchar(start, end-start, from_c);
2666 if (next == NULL)
2667 break;
2668 memcpy(result_s, start, next-start);
2669 result_s += (next-start);
2670 start = next+1;
2671 }
2672 memcpy(result_s, start, end-start);
2673
2674 return result;
2675}
2676
2677/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2678
2679Py_LOCAL(PyStringObject *)
2680replace_delete_substring(PyStringObject *self, PyStringObject *from,
2681 Py_ssize_t maxcount) {
2682 char *self_s, *from_s, *result_s;
2683 char *start, *next, *end;
2684 Py_ssize_t self_len, from_len, result_len;
2685 Py_ssize_t count, offset;
2686 PyStringObject *result;
2687
2688 self_len = PyString_GET_SIZE(self);
2689 self_s = PyString_AS_STRING(self);
2690 from_len = PyString_GET_SIZE(from);
2691 from_s = PyString_AS_STRING(from);
2692
2693 count = countstring(self_s, self_len,
2694 from_s, from_len,
2695 0, self_len, 1,
2696 maxcount);
2697
2698 if (count == 0) {
2699 /* no matches */
2700 return return_self(self);
2701 }
2702
2703 result_len = self_len - (count * from_len);
2704 assert (result_len>=0);
2705
2706 if ( (result = (PyStringObject *)
2707 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2708 return NULL;
2709
2710 result_s = PyString_AS_STRING(result);
2711
2712 start = self_s;
2713 end = self_s + self_len;
2714 while (count-- > 0) {
2715 offset = findstring(start, end-start,
2716 from_s, from_len,
2717 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002718 if (offset == -1)
2719 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002720 next = start + offset;
2721
2722 memcpy(result_s, start, next-start);
2723
2724 result_s += (next-start);
2725 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002726 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002727 memcpy(result_s, start, end-start);
2728 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002729}
2730
Thomas Wouters477c8d52006-05-27 19:21:47 +00002731/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2732Py_LOCAL(PyStringObject *)
2733replace_single_character_in_place(PyStringObject *self,
2734 char from_c, char to_c,
2735 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002736{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002737 char *self_s, *result_s, *start, *end, *next;
2738 Py_ssize_t self_len;
2739 PyStringObject *result;
2740
2741 /* The result string will be the same size */
2742 self_s = PyString_AS_STRING(self);
2743 self_len = PyString_GET_SIZE(self);
2744
2745 next = findchar(self_s, self_len, from_c);
2746
2747 if (next == NULL) {
2748 /* No matches; return the original string */
2749 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002750 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002751
2752 /* Need to make a new string */
2753 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2754 if (result == NULL)
2755 return NULL;
2756 result_s = PyString_AS_STRING(result);
2757 memcpy(result_s, self_s, self_len);
2758
2759 /* change everything in-place, starting with this one */
2760 start = result_s + (next-self_s);
2761 *start = to_c;
2762 start++;
2763 end = result_s + self_len;
2764
2765 while (--maxcount > 0) {
2766 next = findchar(start, end-start, from_c);
2767 if (next == NULL)
2768 break;
2769 *next = to_c;
2770 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002771 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002772
2773 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002774}
2775
Thomas Wouters477c8d52006-05-27 19:21:47 +00002776/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2777Py_LOCAL(PyStringObject *)
2778replace_substring_in_place(PyStringObject *self,
2779 PyStringObject *from,
2780 PyStringObject *to,
2781 Py_ssize_t maxcount)
2782{
2783 char *result_s, *start, *end;
2784 char *self_s, *from_s, *to_s;
2785 Py_ssize_t self_len, from_len, offset;
2786 PyStringObject *result;
2787
2788 /* The result string will be the same size */
2789
2790 self_s = PyString_AS_STRING(self);
2791 self_len = PyString_GET_SIZE(self);
2792
2793 from_s = PyString_AS_STRING(from);
2794 from_len = PyString_GET_SIZE(from);
2795 to_s = PyString_AS_STRING(to);
2796
2797 offset = findstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, FORWARD);
2800
2801 if (offset == -1) {
2802 /* No matches; return the original string */
2803 return return_self(self);
2804 }
2805
2806 /* Need to make a new string */
2807 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2808 if (result == NULL)
2809 return NULL;
2810 result_s = PyString_AS_STRING(result);
2811 memcpy(result_s, self_s, self_len);
2812
2813
2814 /* change everything in-place, starting with this one */
2815 start = result_s + offset;
2816 memcpy(start, to_s, from_len);
2817 start += from_len;
2818 end = result_s + self_len;
2819
2820 while ( --maxcount > 0) {
2821 offset = findstring(start, end-start,
2822 from_s, from_len,
2823 0, end-start, FORWARD);
2824 if (offset==-1)
2825 break;
2826 memcpy(start+offset, to_s, from_len);
2827 start += offset+from_len;
2828 }
2829
2830 return result;
2831}
2832
2833/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2834Py_LOCAL(PyStringObject *)
2835replace_single_character(PyStringObject *self,
2836 char from_c,
2837 PyStringObject *to,
2838 Py_ssize_t maxcount)
2839{
2840 char *self_s, *to_s, *result_s;
2841 char *start, *next, *end;
2842 Py_ssize_t self_len, to_len, result_len;
2843 Py_ssize_t count, product;
2844 PyStringObject *result;
2845
2846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
2848
2849 count = countchar(self_s, self_len, from_c, maxcount);
2850
2851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2854 }
2855
2856 to_s = PyString_AS_STRING(to);
2857 to_len = PyString_GET_SIZE(to);
2858
2859 /* use the difference between current and new, hence the "-1" */
2860 /* result_len = self_len + count * (to_len-1) */
2861 product = count * (to_len-1);
2862 if (product / (to_len-1) != count) {
2863 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2864 return NULL;
2865 }
2866 result_len = self_len + product;
2867 if (result_len < 0) {
2868 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2869 return NULL;
2870 }
2871
2872 if ( (result = (PyStringObject *)
2873 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2874 return NULL;
2875 result_s = PyString_AS_STRING(result);
2876
2877 start = self_s;
2878 end = self_s + self_len;
2879 while (count-- > 0) {
2880 next = findchar(start, end-start, from_c);
2881 if (next == NULL)
2882 break;
2883
2884 if (next == start) {
2885 /* replace with the 'to' */
2886 memcpy(result_s, to_s, to_len);
2887 result_s += to_len;
2888 start += 1;
2889 } else {
2890 /* copy the unchanged old then the 'to' */
2891 memcpy(result_s, start, next-start);
2892 result_s += (next-start);
2893 memcpy(result_s, to_s, to_len);
2894 result_s += to_len;
2895 start = next+1;
2896 }
2897 }
2898 /* Copy the remainder of the remaining string */
2899 memcpy(result_s, start, end-start);
2900
2901 return result;
2902}
2903
2904/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2905Py_LOCAL(PyStringObject *)
2906replace_substring(PyStringObject *self,
2907 PyStringObject *from,
2908 PyStringObject *to,
2909 Py_ssize_t maxcount) {
2910 char *self_s, *from_s, *to_s, *result_s;
2911 char *start, *next, *end;
2912 Py_ssize_t self_len, from_len, to_len, result_len;
2913 Py_ssize_t count, offset, product;
2914 PyStringObject *result;
2915
2916 self_s = PyString_AS_STRING(self);
2917 self_len = PyString_GET_SIZE(self);
2918 from_s = PyString_AS_STRING(from);
2919 from_len = PyString_GET_SIZE(from);
2920
2921 count = countstring(self_s, self_len,
2922 from_s, from_len,
2923 0, self_len, FORWARD, maxcount);
2924 if (count == 0) {
2925 /* no matches, return unchanged */
2926 return return_self(self);
2927 }
2928
2929 to_s = PyString_AS_STRING(to);
2930 to_len = PyString_GET_SIZE(to);
2931
2932 /* Check for overflow */
2933 /* result_len = self_len + count * (to_len-from_len) */
2934 product = count * (to_len-from_len);
2935 if (product / (to_len-from_len) != count) {
2936 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2937 return NULL;
2938 }
2939 result_len = self_len + product;
2940 if (result_len < 0) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
2944
2945 if ( (result = (PyStringObject *)
2946 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2947 return NULL;
2948 result_s = PyString_AS_STRING(result);
2949
2950 start = self_s;
2951 end = self_s + self_len;
2952 while (count-- > 0) {
2953 offset = findstring(start, end-start,
2954 from_s, from_len,
2955 0, end-start, FORWARD);
2956 if (offset == -1)
2957 break;
2958 next = start+offset;
2959 if (next == start) {
2960 /* replace with the 'to' */
2961 memcpy(result_s, to_s, to_len);
2962 result_s += to_len;
2963 start += from_len;
2964 } else {
2965 /* copy the unchanged old then the 'to' */
2966 memcpy(result_s, start, next-start);
2967 result_s += (next-start);
2968 memcpy(result_s, to_s, to_len);
2969 result_s += to_len;
2970 start = next+from_len;
2971 }
2972 }
2973 /* Copy the remainder of the remaining string */
2974 memcpy(result_s, start, end-start);
2975
2976 return result;
2977}
2978
2979
2980Py_LOCAL(PyStringObject *)
2981replace(PyStringObject *self,
2982 PyStringObject *from,
2983 PyStringObject *to,
2984 Py_ssize_t maxcount)
2985{
2986 Py_ssize_t from_len, to_len;
2987
2988 if (maxcount < 0) {
2989 maxcount = PY_SSIZE_T_MAX;
2990 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2991 /* nothing to do; return the original string */
2992 return return_self(self);
2993 }
2994
2995 from_len = PyString_GET_SIZE(from);
2996 to_len = PyString_GET_SIZE(to);
2997
2998 if (maxcount == 0 ||
2999 (from_len == 0 && to_len == 0)) {
3000 /* nothing to do; return the original string */
3001 return return_self(self);
3002 }
3003
3004 /* Handle zero-length special cases */
3005
3006 if (from_len == 0) {
3007 /* insert the 'to' string everywhere. */
3008 /* >>> "Python".replace("", ".") */
3009 /* '.P.y.t.h.o.n.' */
3010 return replace_interleave(self, to, maxcount);
3011 }
3012
3013 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3014 /* point for an empty self string to generate a non-empty string */
3015 /* Special case so the remaining code always gets a non-empty string */
3016 if (PyString_GET_SIZE(self) == 0) {
3017 return return_self(self);
3018 }
3019
3020 if (to_len == 0) {
3021 /* delete all occurances of 'from' string */
3022 if (from_len == 1) {
3023 return replace_delete_single_character(
3024 self, PyString_AS_STRING(from)[0], maxcount);
3025 } else {
3026 return replace_delete_substring(self, from, maxcount);
3027 }
3028 }
3029
3030 /* Handle special case where both strings have the same length */
3031
3032 if (from_len == to_len) {
3033 if (from_len == 1) {
3034 return replace_single_character_in_place(
3035 self,
3036 PyString_AS_STRING(from)[0],
3037 PyString_AS_STRING(to)[0],
3038 maxcount);
3039 } else {
3040 return replace_substring_in_place(
3041 self, from, to, maxcount);
3042 }
3043 }
3044
3045 /* Otherwise use the more generic algorithms */
3046 if (from_len == 1) {
3047 return replace_single_character(self, PyString_AS_STRING(from)[0],
3048 to, maxcount);
3049 } else {
3050 /* len('from')>=2, len('to')>=1 */
3051 return replace_substring(self, from, to, maxcount);
3052 }
3053}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003055PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003056"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003057\n\
3058Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003059old replaced by new. If the optional argument count is\n\
3060given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003061
3062static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003063string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003065 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003066 PyObject *from, *to;
3067 const char *tmp_s;
3068 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003069
Thomas Wouters477c8d52006-05-27 19:21:47 +00003070 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003071 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072
Thomas Wouters477c8d52006-05-27 19:21:47 +00003073 if (PyString_Check(from)) {
3074 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003077 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003079 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00003081 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 return NULL;
3083
Thomas Wouters477c8d52006-05-27 19:21:47 +00003084 if (PyString_Check(to)) {
3085 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003087#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003088 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003089 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003090 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003091#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00003092 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093 return NULL;
3094
Thomas Wouters477c8d52006-05-27 19:21:47 +00003095 return (PyObject *)replace((PyStringObject *) self,
3096 (PyStringObject *) from,
3097 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098}
3099
Thomas Wouters477c8d52006-05-27 19:21:47 +00003100/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003101
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003102PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003104\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003105Return True if S starts with the specified prefix, False otherwise.\n\
3106With optional start, test S beginning at that position.\n\
3107With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003108
3109static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003110string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003111{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003113 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003115 Py_ssize_t plen;
3116 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003117 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003119
Guido van Rossumc6821402000-05-08 14:08:05 +00003120 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3121 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003122 return NULL;
3123 if (PyString_Check(subobj)) {
3124 prefix = PyString_AS_STRING(subobj);
3125 plen = PyString_GET_SIZE(subobj);
3126 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003127#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003128 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003129 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003130 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003131 subobj, start, end, -1);
3132 if (rc == -1)
3133 return NULL;
3134 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003136 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003137#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139 return NULL;
3140
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003141 string_adjust_indices(&start, &end, len);
3142
3143 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003144 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003145
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003146 if (end-start >= plen)
3147 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3148 else
3149 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150}
3151
3152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003153PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003154"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003155\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003156Return True if S ends with the specified suffix, False otherwise.\n\
3157With optional start, test S beginning at that position.\n\
3158With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003159
3160static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003161string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003162{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003164 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003166 Py_ssize_t slen;
3167 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003168 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003170
Guido van Rossumc6821402000-05-08 14:08:05 +00003171 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3172 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003173 return NULL;
3174 if (PyString_Check(subobj)) {
3175 suffix = PyString_AS_STRING(subobj);
3176 slen = PyString_GET_SIZE(subobj);
3177 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003178#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003179 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003180 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003181 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003182 subobj, start, end, +1);
3183 if (rc == -1)
3184 return NULL;
3185 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003186 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003187 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003188#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003189 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003190 return NULL;
3191
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003192 string_adjust_indices(&start, &end, len);
3193
3194 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003195 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003196
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003197 if (end-slen > start)
3198 start = end - slen;
3199 if (end-start >= slen)
3200 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3201 else
3202 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203}
3204
3205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003206PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003207"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003208\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003209Encodes S using the codec registered for encoding. encoding defaults\n\
3210to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003211handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003212a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3213'xmlcharrefreplace' as well as any other name registered with\n\
3214codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003215
3216static PyObject *
3217string_encode(PyStringObject *self, PyObject *args)
3218{
3219 char *encoding = NULL;
3220 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003221 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003222
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003223 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3224 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003225 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003226 if (v == NULL)
3227 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003228 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3229 PyErr_Format(PyExc_TypeError,
3230 "encoder did not return a string/unicode object "
3231 "(type=%.400s)",
3232 v->ob_type->tp_name);
3233 Py_DECREF(v);
3234 return NULL;
3235 }
3236 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003237
3238 onError:
3239 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003240}
3241
3242
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003243PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003244"S.decode([encoding[,errors]]) -> object\n\
3245\n\
3246Decodes S using the codec registered for encoding. encoding defaults\n\
3247to the default encoding. errors may be given to set a different error\n\
3248handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003249a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3250as well as any other name registerd with codecs.register_error that is\n\
3251able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003252
3253static PyObject *
3254string_decode(PyStringObject *self, PyObject *args)
3255{
3256 char *encoding = NULL;
3257 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003258 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003259
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003260 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3261 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003262 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003263 if (v == NULL)
3264 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003265 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3266 PyErr_Format(PyExc_TypeError,
3267 "decoder did not return a string/unicode object "
3268 "(type=%.400s)",
3269 v->ob_type->tp_name);
3270 Py_DECREF(v);
3271 return NULL;
3272 }
3273 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003274
3275 onError:
3276 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003277}
3278
3279
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003280PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003281"S.expandtabs([tabsize]) -> string\n\
3282\n\
3283Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003284If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003285
3286static PyObject*
3287string_expandtabs(PyStringObject *self, PyObject *args)
3288{
3289 const char *e, *p;
3290 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003291 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003292 PyObject *u;
3293 int tabsize = 8;
3294
3295 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3296 return NULL;
3297
Thomas Wouters7e474022000-07-16 12:04:32 +00003298 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003299 i = j = 0;
3300 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3301 for (p = PyString_AS_STRING(self); p < e; p++)
3302 if (*p == '\t') {
3303 if (tabsize > 0)
3304 j += tabsize - (j % tabsize);
3305 }
3306 else {
3307 j++;
3308 if (*p == '\n' || *p == '\r') {
3309 i += j;
3310 j = 0;
3311 }
3312 }
3313
3314 /* Second pass: create output string and fill it */
3315 u = PyString_FromStringAndSize(NULL, i + j);
3316 if (!u)
3317 return NULL;
3318
3319 j = 0;
3320 q = PyString_AS_STRING(u);
3321
3322 for (p = PyString_AS_STRING(self); p < e; p++)
3323 if (*p == '\t') {
3324 if (tabsize > 0) {
3325 i = tabsize - (j % tabsize);
3326 j += i;
3327 while (i--)
3328 *q++ = ' ';
3329 }
3330 }
3331 else {
3332 j++;
3333 *q++ = *p;
3334 if (*p == '\n' || *p == '\r')
3335 j = 0;
3336 }
3337
3338 return u;
3339}
3340
Thomas Wouters477c8d52006-05-27 19:21:47 +00003341Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003342pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003343{
3344 PyObject *u;
3345
3346 if (left < 0)
3347 left = 0;
3348 if (right < 0)
3349 right = 0;
3350
Tim Peters8fa5dd02001-09-12 02:18:30 +00003351 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 Py_INCREF(self);
3353 return (PyObject *)self;
3354 }
3355
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003356 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003357 left + PyString_GET_SIZE(self) + right);
3358 if (u) {
3359 if (left)
3360 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003361 memcpy(PyString_AS_STRING(u) + left,
3362 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 PyString_GET_SIZE(self));
3364 if (right)
3365 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3366 fill, right);
3367 }
3368
3369 return u;
3370}
3371
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003372PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003373"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003374"\n"
3375"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003376"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003377
3378static PyObject *
3379string_ljust(PyStringObject *self, PyObject *args)
3380{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003381 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003382 char fillchar = ' ';
3383
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003384 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003385 return NULL;
3386
Tim Peters8fa5dd02001-09-12 02:18:30 +00003387 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388 Py_INCREF(self);
3389 return (PyObject*) self;
3390 }
3391
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003392 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003393}
3394
3395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003396PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003397"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003398"\n"
3399"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003400"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401
3402static PyObject *
3403string_rjust(PyStringObject *self, PyObject *args)
3404{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003405 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003406 char fillchar = ' ';
3407
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003408 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409 return NULL;
3410
Tim Peters8fa5dd02001-09-12 02:18:30 +00003411 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 Py_INCREF(self);
3413 return (PyObject*) self;
3414 }
3415
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003416 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003417}
3418
3419
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003420PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003422"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003423"Return S centered in a string of length width. Padding is\n"
3424"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425
3426static PyObject *
3427string_center(PyStringObject *self, PyObject *args)
3428{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003429 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003430 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003431 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003433 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434 return NULL;
3435
Tim Peters8fa5dd02001-09-12 02:18:30 +00003436 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003437 Py_INCREF(self);
3438 return (PyObject*) self;
3439 }
3440
3441 marg = width - PyString_GET_SIZE(self);
3442 left = marg / 2 + (marg & width & 1);
3443
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445}
3446
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003447PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003448"S.zfill(width) -> string\n"
3449"\n"
3450"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003451"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003452
3453static PyObject *
3454string_zfill(PyStringObject *self, PyObject *args)
3455{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003456 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003457 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003458 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003459 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003460
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003461 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003462 return NULL;
3463
3464 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003465 if (PyString_CheckExact(self)) {
3466 Py_INCREF(self);
3467 return (PyObject*) self;
3468 }
3469 else
3470 return PyString_FromStringAndSize(
3471 PyString_AS_STRING(self),
3472 PyString_GET_SIZE(self)
3473 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003474 }
3475
3476 fill = width - PyString_GET_SIZE(self);
3477
3478 s = pad(self, fill, 0, '0');
3479
3480 if (s == NULL)
3481 return NULL;
3482
3483 p = PyString_AS_STRING(s);
3484 if (p[fill] == '+' || p[fill] == '-') {
3485 /* move sign to beginning of string */
3486 p[0] = p[fill];
3487 p[fill] = '0';
3488 }
3489
3490 return (PyObject*) s;
3491}
3492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003493PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003494"S.isspace() -> bool\n\
3495\n\
3496Return True if all characters in S are whitespace\n\
3497and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498
3499static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003500string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501{
Fred Drakeba096332000-07-09 07:04:36 +00003502 register const unsigned char *p
3503 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003504 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506 /* Shortcut for single character strings */
3507 if (PyString_GET_SIZE(self) == 1 &&
3508 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003509 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003511 /* Special case for empty strings */
3512 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003513 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003514
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515 e = p + PyString_GET_SIZE(self);
3516 for (; p < e; p++) {
3517 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003519 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003520 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003521}
3522
3523
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003524PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003525"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003526\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003527Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003528and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003529
3530static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003531string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003532{
Fred Drakeba096332000-07-09 07:04:36 +00003533 register const unsigned char *p
3534 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003535 register const unsigned char *e;
3536
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003537 /* Shortcut for single character strings */
3538 if (PyString_GET_SIZE(self) == 1 &&
3539 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003540 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003541
3542 /* Special case for empty strings */
3543 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003544 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003545
3546 e = p + PyString_GET_SIZE(self);
3547 for (; p < e; p++) {
3548 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003551 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003552}
3553
3554
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003555PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003556"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003557\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003558Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003559and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003560
3561static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003562string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563{
Fred Drakeba096332000-07-09 07:04:36 +00003564 register const unsigned char *p
3565 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566 register const unsigned char *e;
3567
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003568 /* Shortcut for single character strings */
3569 if (PyString_GET_SIZE(self) == 1 &&
3570 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003571 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003572
3573 /* Special case for empty strings */
3574 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576
3577 e = p + PyString_GET_SIZE(self);
3578 for (; p < e; p++) {
3579 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003583}
3584
3585
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003586PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003589Return True if all characters in S are digits\n\
3590and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003591
3592static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003593string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003594{
Fred Drakeba096332000-07-09 07:04:36 +00003595 register const unsigned char *p
3596 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003597 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599 /* Shortcut for single character strings */
3600 if (PyString_GET_SIZE(self) == 1 &&
3601 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003602 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003604 /* Special case for empty strings */
3605 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003607
Guido van Rossum4c08d552000-03-10 22:55:18 +00003608 e = p + PyString_GET_SIZE(self);
3609 for (; p < e; p++) {
3610 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003614}
3615
3616
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003617PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003620Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003621at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622
3623static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003624string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625{
Fred Drakeba096332000-07-09 07:04:36 +00003626 register const unsigned char *p
3627 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003628 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 int cased;
3630
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631 /* Shortcut for single character strings */
3632 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003635 /* Special case for empty strings */
3636 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003638
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639 e = p + PyString_GET_SIZE(self);
3640 cased = 0;
3641 for (; p < e; p++) {
3642 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644 else if (!cased && islower(*p))
3645 cased = 1;
3646 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648}
3649
3650
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003651PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003652"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003654Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003655at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656
3657static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003658string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659{
Fred Drakeba096332000-07-09 07:04:36 +00003660 register const unsigned char *p
3661 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003662 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663 int cased;
3664
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 /* Shortcut for single character strings */
3666 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003669 /* Special case for empty strings */
3670 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003672
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 e = p + PyString_GET_SIZE(self);
3674 cased = 0;
3675 for (; p < e; p++) {
3676 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 else if (!cased && isupper(*p))
3679 cased = 1;
3680 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682}
3683
3684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003685PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003688Return True if S is a titlecased string and there is at least one\n\
3689character in S, i.e. uppercase characters may only follow uncased\n\
3690characters and lowercase characters only cased ones. Return False\n\
3691otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
3693static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003694string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695{
Fred Drakeba096332000-07-09 07:04:36 +00003696 register const unsigned char *p
3697 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003698 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 int cased, previous_is_cased;
3700
Guido van Rossum4c08d552000-03-10 22:55:18 +00003701 /* Shortcut for single character strings */
3702 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003705 /* Special case for empty strings */
3706 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003708
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 e = p + PyString_GET_SIZE(self);
3710 cased = 0;
3711 previous_is_cased = 0;
3712 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003713 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714
3715 if (isupper(ch)) {
3716 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003717 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718 previous_is_cased = 1;
3719 cased = 1;
3720 }
3721 else if (islower(ch)) {
3722 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724 previous_is_cased = 1;
3725 cased = 1;
3726 }
3727 else
3728 previous_is_cased = 0;
3729 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731}
3732
3733
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003734PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003735"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736\n\
3737Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003738Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003739is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741static PyObject*
3742string_splitlines(PyStringObject *self, PyObject *args)
3743{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003744 register Py_ssize_t i;
3745 register Py_ssize_t j;
3746 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003747 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 PyObject *list;
3749 PyObject *str;
3750 char *data;
3751
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003752 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753 return NULL;
3754
3755 data = PyString_AS_STRING(self);
3756 len = PyString_GET_SIZE(self);
3757
Thomas Wouters477c8d52006-05-27 19:21:47 +00003758 /* This does not use the preallocated list because splitlines is
3759 usually run with hundreds of newlines. The overhead of
3760 switching between PyList_SET_ITEM and append causes about a
3761 2-3% slowdown for that common case. A smarter implementation
3762 could move the if check out, so the SET_ITEMs are done first
3763 and the appends only done when the prealloc buffer is full.
3764 That's too much work for little gain.*/
3765
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 list = PyList_New(0);
3767 if (!list)
3768 goto onError;
3769
3770 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003771 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003772
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773 /* Find a line and append it */
3774 while (i < len && data[i] != '\n' && data[i] != '\r')
3775 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776
3777 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003778 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 if (i < len) {
3780 if (data[i] == '\r' && i + 1 < len &&
3781 data[i+1] == '\n')
3782 i += 2;
3783 else
3784 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003785 if (keepends)
3786 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003788 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 j = i;
3790 }
3791 if (j < len) {
3792 SPLIT_APPEND(data, j, len);
3793 }
3794
3795 return list;
3796
3797 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003798 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799 return NULL;
3800}
3801
3802#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003803#undef SPLIT_ADD
3804#undef MAX_PREALLOC
3805#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003806
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003807static PyObject *
3808string_getnewargs(PyStringObject *v)
3809{
3810 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3811}
3812
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003813
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003814static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003815string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 /* Counterparts of the obsolete stropmodule functions; except
3817 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003818 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3819 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003820 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003821 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3822 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003823 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3824 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3825 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3826 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3827 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3828 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3829 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003830 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3831 capitalize__doc__},
3832 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3833 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3834 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003835 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003836 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3837 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3838 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3839 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3840 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3841 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3842 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003843 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3844 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003845 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3846 startswith__doc__},
3847 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3848 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3849 swapcase__doc__},
3850 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3851 translate__doc__},
3852 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3853 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3854 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3855 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3856 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3857 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3858 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3859 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3860 expandtabs__doc__},
3861 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3862 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003863 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003864 {NULL, NULL} /* sentinel */
3865};
3866
Jeremy Hylton938ace62002-07-17 16:30:39 +00003867static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003868str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003870static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003871string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003872{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003873 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003874 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003875
Guido van Rossumae960af2001-08-30 03:11:59 +00003876 if (type != &PyString_Type)
3877 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003878 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3879 return NULL;
3880 if (x == NULL)
3881 return PyString_FromString("");
3882 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003883}
3884
Guido van Rossumae960af2001-08-30 03:11:59 +00003885static PyObject *
3886str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3887{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003888 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003889 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003890
3891 assert(PyType_IsSubtype(type, &PyString_Type));
3892 tmp = string_new(&PyString_Type, args, kwds);
3893 if (tmp == NULL)
3894 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003895 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003896 n = PyString_GET_SIZE(tmp);
3897 pnew = type->tp_alloc(type, n);
3898 if (pnew != NULL) {
3899 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003900 ((PyStringObject *)pnew)->ob_shash =
3901 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003902 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003903 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003904 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003905 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003906}
3907
Guido van Rossumcacfc072002-05-24 19:01:59 +00003908static PyObject *
3909basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3910{
3911 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003912 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003913 return NULL;
3914}
3915
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003916static PyObject *
3917string_mod(PyObject *v, PyObject *w)
3918{
3919 if (!PyString_Check(v)) {
3920 Py_INCREF(Py_NotImplemented);
3921 return Py_NotImplemented;
3922 }
3923 return PyString_Format(v, w);
3924}
3925
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003926PyDoc_STRVAR(basestring_doc,
3927"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003928
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003929static PyNumberMethods string_as_number = {
3930 0, /*nb_add*/
3931 0, /*nb_subtract*/
3932 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003933 string_mod, /*nb_remainder*/
3934};
3935
3936
Guido van Rossumcacfc072002-05-24 19:01:59 +00003937PyTypeObject PyBaseString_Type = {
3938 PyObject_HEAD_INIT(&PyType_Type)
3939 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003940 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003941 0,
3942 0,
3943 0, /* tp_dealloc */
3944 0, /* tp_print */
3945 0, /* tp_getattr */
3946 0, /* tp_setattr */
3947 0, /* tp_compare */
3948 0, /* tp_repr */
3949 0, /* tp_as_number */
3950 0, /* tp_as_sequence */
3951 0, /* tp_as_mapping */
3952 0, /* tp_hash */
3953 0, /* tp_call */
3954 0, /* tp_str */
3955 0, /* tp_getattro */
3956 0, /* tp_setattro */
3957 0, /* tp_as_buffer */
3958 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3959 basestring_doc, /* tp_doc */
3960 0, /* tp_traverse */
3961 0, /* tp_clear */
3962 0, /* tp_richcompare */
3963 0, /* tp_weaklistoffset */
3964 0, /* tp_iter */
3965 0, /* tp_iternext */
3966 0, /* tp_methods */
3967 0, /* tp_members */
3968 0, /* tp_getset */
3969 &PyBaseObject_Type, /* tp_base */
3970 0, /* tp_dict */
3971 0, /* tp_descr_get */
3972 0, /* tp_descr_set */
3973 0, /* tp_dictoffset */
3974 0, /* tp_init */
3975 0, /* tp_alloc */
3976 basestring_new, /* tp_new */
3977 0, /* tp_free */
3978};
3979
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003980PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003981"str(object) -> string\n\
3982\n\
3983Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003984If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003985
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986PyTypeObject PyString_Type = {
3987 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003988 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003989 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003990 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003991 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003992 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003993 (printfunc)string_print, /* tp_print */
3994 0, /* tp_getattr */
3995 0, /* tp_setattr */
3996 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003997 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003998 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003999 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004000 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004001 (hashfunc)string_hash, /* tp_hash */
4002 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004003 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004004 PyObject_GenericGetAttr, /* tp_getattro */
4005 0, /* tp_setattro */
4006 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004007 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004008 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009 string_doc, /* tp_doc */
4010 0, /* tp_traverse */
4011 0, /* tp_clear */
4012 (richcmpfunc)string_richcompare, /* tp_richcompare */
4013 0, /* tp_weaklistoffset */
4014 0, /* tp_iter */
4015 0, /* tp_iternext */
4016 string_methods, /* tp_methods */
4017 0, /* tp_members */
4018 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004019 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004020 0, /* tp_dict */
4021 0, /* tp_descr_get */
4022 0, /* tp_descr_set */
4023 0, /* tp_dictoffset */
4024 0, /* tp_init */
4025 0, /* tp_alloc */
4026 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004027 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004028};
4029
4030void
Fred Drakeba096332000-07-09 07:04:36 +00004031PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004032{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004034 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004035 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 if (w == NULL || !PyString_Check(*pv)) {
4037 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004038 *pv = NULL;
4039 return;
4040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041 v = string_concat((PyStringObject *) *pv, w);
4042 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043 *pv = v;
4044}
4045
Guido van Rossum013142a1994-08-30 08:19:36 +00004046void
Fred Drakeba096332000-07-09 07:04:36 +00004047PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004048{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 PyString_Concat(pv, w);
4050 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004051}
4052
4053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004054/* The following function breaks the notion that strings are immutable:
4055 it changes the size of a string. We get away with this only if there
4056 is only one module referencing the object. You can also think of it
4057 as creating a new string object and destroying the old one, only
4058 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004059 already be known to some other part of the code...
4060 Note that if there's not enough memory to resize the string, the original
4061 string object at *pv is deallocated, *pv is set to NULL, an "out of
4062 memory" exception is set, and -1 is returned. Else (on success) 0 is
4063 returned, and the value in *pv may or may not be the same as on input.
4064 As always, an extra byte is allocated for a trailing \0 byte (newsize
4065 does *not* include that), and a trailing \0 byte is stored.
4066*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004067
4068int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004069_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 register PyObject *v;
4072 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004073 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004074 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4075 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004076 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004077 Py_DECREF(v);
4078 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004079 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004080 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004081 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004082 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 _Py_ForgetReference(v);
4084 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004085 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004086 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004087 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004088 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004089 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 _Py_NewReference(*pv);
4092 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004093 sv->ob_size = newsize;
4094 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004095 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004096 return 0;
4097}
Guido van Rossume5372401993-03-16 12:15:04 +00004098
4099/* Helpers for formatstring */
4100
Thomas Wouters477c8d52006-05-27 19:21:47 +00004101Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004102getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004103{
Thomas Wouters977485d2006-02-16 15:59:12 +00004104 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004105 if (argidx < arglen) {
4106 (*p_argidx)++;
4107 if (arglen < 0)
4108 return args;
4109 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004110 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004111 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004112 PyErr_SetString(PyExc_TypeError,
4113 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004114 return NULL;
4115}
4116
Tim Peters38fd5b62000-09-21 05:43:11 +00004117/* Format codes
4118 * F_LJUST '-'
4119 * F_SIGN '+'
4120 * F_BLANK ' '
4121 * F_ALT '#'
4122 * F_ZERO '0'
4123 */
Guido van Rossume5372401993-03-16 12:15:04 +00004124#define F_LJUST (1<<0)
4125#define F_SIGN (1<<1)
4126#define F_BLANK (1<<2)
4127#define F_ALT (1<<3)
4128#define F_ZERO (1<<4)
4129
Thomas Wouters477c8d52006-05-27 19:21:47 +00004130Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004131formatfloat(char *buf, size_t buflen, int flags,
4132 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004133{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004134 /* fmt = '%#.' + `prec` + `type`
4135 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004136 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004137 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004138 x = PyFloat_AsDouble(v);
4139 if (x == -1.0 && PyErr_Occurred()) {
4140 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004141 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004142 }
Guido van Rossume5372401993-03-16 12:15:04 +00004143 if (prec < 0)
4144 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004145 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4146 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004147 /* Worst case length calc to ensure no buffer overrun:
4148
4149 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004150 fmt = %#.<prec>g
4151 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004152 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004153 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004154
4155 'f' formats:
4156 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4157 len = 1 + 50 + 1 + prec = 52 + prec
4158
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004159 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004160 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004161
4162 */
4163 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4164 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004165 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004166 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004167 return -1;
4168 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004169 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4170 (flags&F_ALT) ? "#" : "",
4171 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004172 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004173 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004174}
4175
Tim Peters38fd5b62000-09-21 05:43:11 +00004176/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4177 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4178 * Python's regular ints.
4179 * Return value: a new PyString*, or NULL if error.
4180 * . *pbuf is set to point into it,
4181 * *plen set to the # of chars following that.
4182 * Caller must decref it when done using pbuf.
4183 * The string starting at *pbuf is of the form
4184 * "-"? ("0x" | "0X")? digit+
4185 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004186 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004187 * There will be at least prec digits, zero-filled on the left if
4188 * necessary to get that many.
4189 * val object to be converted
4190 * flags bitmask of format flags; only F_ALT is looked at
4191 * prec minimum number of digits; 0-fill on left if needed
4192 * type a character in [duoxX]; u acts the same as d
4193 *
4194 * CAUTION: o, x and X conversions on regular ints can never
4195 * produce a '-' sign, but can for Python's unbounded ints.
4196 */
4197PyObject*
4198_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4199 char **pbuf, int *plen)
4200{
4201 PyObject *result = NULL;
4202 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004203 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004204 int sign; /* 1 if '-', else 0 */
4205 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004206 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004207 int numdigits; /* len == numnondigits + numdigits */
4208 int numnondigits = 0;
4209
4210 switch (type) {
4211 case 'd':
4212 case 'u':
4213 result = val->ob_type->tp_str(val);
4214 break;
4215 case 'o':
4216 result = val->ob_type->tp_as_number->nb_oct(val);
4217 break;
4218 case 'x':
4219 case 'X':
4220 numnondigits = 2;
4221 result = val->ob_type->tp_as_number->nb_hex(val);
4222 break;
4223 default:
4224 assert(!"'type' not in [duoxX]");
4225 }
4226 if (!result)
4227 return NULL;
4228
4229 /* To modify the string in-place, there can only be one reference. */
4230 if (result->ob_refcnt != 1) {
4231 PyErr_BadInternalCall();
4232 return NULL;
4233 }
4234 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004235 llen = PyString_Size(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004236 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004237 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4238 return NULL;
4239 }
4240 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 if (buf[len-1] == 'L') {
4242 --len;
4243 buf[len] = '\0';
4244 }
4245 sign = buf[0] == '-';
4246 numnondigits += sign;
4247 numdigits = len - numnondigits;
4248 assert(numdigits > 0);
4249
Tim Petersfff53252001-04-12 18:38:48 +00004250 /* Get rid of base marker unless F_ALT */
4251 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004252 /* Need to skip 0x, 0X or 0. */
4253 int skipped = 0;
4254 switch (type) {
4255 case 'o':
4256 assert(buf[sign] == '0');
4257 /* If 0 is only digit, leave it alone. */
4258 if (numdigits > 1) {
4259 skipped = 1;
4260 --numdigits;
4261 }
4262 break;
4263 case 'x':
4264 case 'X':
4265 assert(buf[sign] == '0');
4266 assert(buf[sign + 1] == 'x');
4267 skipped = 2;
4268 numnondigits -= 2;
4269 break;
4270 }
4271 if (skipped) {
4272 buf += skipped;
4273 len -= skipped;
4274 if (sign)
4275 buf[0] = '-';
4276 }
4277 assert(len == numnondigits + numdigits);
4278 assert(numdigits > 0);
4279 }
4280
4281 /* Fill with leading zeroes to meet minimum width. */
4282 if (prec > numdigits) {
4283 PyObject *r1 = PyString_FromStringAndSize(NULL,
4284 numnondigits + prec);
4285 char *b1;
4286 if (!r1) {
4287 Py_DECREF(result);
4288 return NULL;
4289 }
4290 b1 = PyString_AS_STRING(r1);
4291 for (i = 0; i < numnondigits; ++i)
4292 *b1++ = *buf++;
4293 for (i = 0; i < prec - numdigits; i++)
4294 *b1++ = '0';
4295 for (i = 0; i < numdigits; i++)
4296 *b1++ = *buf++;
4297 *b1 = '\0';
4298 Py_DECREF(result);
4299 result = r1;
4300 buf = PyString_AS_STRING(result);
4301 len = numnondigits + prec;
4302 }
4303
4304 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004305 if (type == 'X') {
4306 /* Need to convert all lower case letters to upper case.
4307 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004308 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004309 if (buf[i] >= 'a' && buf[i] <= 'x')
4310 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004311 }
4312 *pbuf = buf;
4313 *plen = len;
4314 return result;
4315}
4316
Thomas Wouters477c8d52006-05-27 19:21:47 +00004317Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004318formatint(char *buf, size_t buflen, int flags,
4319 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004320{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004321 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004322 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4323 + 1 + 1 = 24 */
4324 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004325 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004326 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004327
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004328 x = PyInt_AsLong(v);
4329 if (x == -1 && PyErr_Occurred()) {
4330 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004331 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004332 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004333 if (x < 0 && type == 'u') {
4334 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004335 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004336 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4337 sign = "-";
4338 else
4339 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004340 if (prec < 0)
4341 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004342
4343 if ((flags & F_ALT) &&
4344 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004345 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004346 * of issues that cause pain:
4347 * - when 0 is being converted, the C standard leaves off
4348 * the '0x' or '0X', which is inconsistent with other
4349 * %#x/%#X conversions and inconsistent with Python's
4350 * hex() function
4351 * - there are platforms that violate the standard and
4352 * convert 0 with the '0x' or '0X'
4353 * (Metrowerks, Compaq Tru64)
4354 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004355 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004356 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004357 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004358 * We can achieve the desired consistency by inserting our
4359 * own '0x' or '0X' prefix, and substituting %x/%X in place
4360 * of %#x/%#X.
4361 *
4362 * Note that this is the same approach as used in
4363 * formatint() in unicodeobject.c
4364 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004365 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4366 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004367 }
4368 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004369 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4370 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 prec, type);
4372 }
4373
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004374 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4375 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004377 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004378 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004379 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004380 return -1;
4381 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004382 if (sign[0])
4383 PyOS_snprintf(buf, buflen, fmt, -x);
4384 else
4385 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004386 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004387}
4388
Thomas Wouters477c8d52006-05-27 19:21:47 +00004389Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004390formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004391{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004392 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004393 if (PyString_Check(v)) {
4394 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004395 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004396 }
4397 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004398 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004399 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004400 }
4401 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004402 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004403}
4404
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004405/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4406
4407 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4408 chars are formatted. XXX This is a magic number. Each formatting
4409 routine does bounds checking to ensure no overflow, but a better
4410 solution may be to malloc a buffer of appropriate size for each
4411 format. For now, the current solution is sufficient.
4412*/
4413#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004414
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004415PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004416PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004417{
4418 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004419 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004420 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004421 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004422 PyObject *result, *orig_args;
4423#ifdef Py_USING_UNICODE
4424 PyObject *v, *w;
4425#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004426 PyObject *dict = NULL;
4427 if (format == NULL || !PyString_Check(format) || args == NULL) {
4428 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004429 return NULL;
4430 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004431 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004432 fmt = PyString_AS_STRING(format);
4433 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004434 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004435 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004436 if (result == NULL)
4437 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004438 res = PyString_AsString(result);
4439 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004440 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004441 argidx = 0;
4442 }
4443 else {
4444 arglen = -1;
4445 argidx = -2;
4446 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004447 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4448 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004449 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004450 while (--fmtcnt >= 0) {
4451 if (*fmt != '%') {
4452 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004453 rescnt = fmtcnt + 100;
4454 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004455 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004456 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004457 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004458 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004459 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 }
4461 *res++ = *fmt++;
4462 }
4463 else {
4464 /* Got a format specifier */
4465 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004466 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004467 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004468 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004469 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004470 PyObject *v = NULL;
4471 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004472 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004473 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004474 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004475 char formatbuf[FORMATBUFLEN];
4476 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004477#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004478 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004479 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004480#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004481
Guido van Rossumda9c2711996-12-05 21:58:58 +00004482 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004483 if (*fmt == '(') {
4484 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004485 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004486 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004487 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004488
4489 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004491 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004492 goto error;
4493 }
4494 ++fmt;
4495 --fmtcnt;
4496 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004497 /* Skip over balanced parentheses */
4498 while (pcount > 0 && --fmtcnt >= 0) {
4499 if (*fmt == ')')
4500 --pcount;
4501 else if (*fmt == '(')
4502 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004503 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004504 }
4505 keylen = fmt - keystart - 1;
4506 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 "incomplete format key");
4509 goto error;
4510 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 key = PyString_FromStringAndSize(keystart,
4512 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 if (key == NULL)
4514 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004515 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004517 args_owned = 0;
4518 }
4519 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004521 if (args == NULL) {
4522 goto error;
4523 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004524 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004525 arglen = -1;
4526 argidx = -2;
4527 }
Guido van Rossume5372401993-03-16 12:15:04 +00004528 while (--fmtcnt >= 0) {
4529 switch (c = *fmt++) {
4530 case '-': flags |= F_LJUST; continue;
4531 case '+': flags |= F_SIGN; continue;
4532 case ' ': flags |= F_BLANK; continue;
4533 case '#': flags |= F_ALT; continue;
4534 case '0': flags |= F_ZERO; continue;
4535 }
4536 break;
4537 }
4538 if (c == '*') {
4539 v = getnextarg(args, arglen, &argidx);
4540 if (v == NULL)
4541 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 if (!PyInt_Check(v)) {
4543 PyErr_SetString(PyExc_TypeError,
4544 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004545 goto error;
4546 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004547 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004548 if (width < 0) {
4549 flags |= F_LJUST;
4550 width = -width;
4551 }
Guido van Rossume5372401993-03-16 12:15:04 +00004552 if (--fmtcnt >= 0)
4553 c = *fmt++;
4554 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004555 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004556 width = c - '0';
4557 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004558 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004559 if (!isdigit(c))
4560 break;
4561 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004562 PyErr_SetString(
4563 PyExc_ValueError,
4564 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004565 goto error;
4566 }
4567 width = width*10 + (c - '0');
4568 }
4569 }
4570 if (c == '.') {
4571 prec = 0;
4572 if (--fmtcnt >= 0)
4573 c = *fmt++;
4574 if (c == '*') {
4575 v = getnextarg(args, arglen, &argidx);
4576 if (v == NULL)
4577 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 if (!PyInt_Check(v)) {
4579 PyErr_SetString(
4580 PyExc_TypeError,
4581 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004582 goto error;
4583 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004585 if (prec < 0)
4586 prec = 0;
4587 if (--fmtcnt >= 0)
4588 c = *fmt++;
4589 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004590 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004591 prec = c - '0';
4592 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004593 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004594 if (!isdigit(c))
4595 break;
4596 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 PyErr_SetString(
4598 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004599 "prec too big");
4600 goto error;
4601 }
4602 prec = prec*10 + (c - '0');
4603 }
4604 }
4605 } /* prec */
4606 if (fmtcnt >= 0) {
4607 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004608 if (--fmtcnt >= 0)
4609 c = *fmt++;
4610 }
4611 }
4612 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004613 PyErr_SetString(PyExc_ValueError,
4614 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004615 goto error;
4616 }
4617 if (c != '%') {
4618 v = getnextarg(args, arglen, &argidx);
4619 if (v == NULL)
4620 goto error;
4621 }
4622 sign = 0;
4623 fill = ' ';
4624 switch (c) {
4625 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004626 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004627 len = 1;
4628 break;
4629 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004630#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004631 if (PyUnicode_Check(v)) {
4632 fmt = fmt_start;
4633 argidx = argidx_start;
4634 goto unicode;
4635 }
Georg Brandld45014b2005-10-01 17:06:00 +00004636#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004637 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004638#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004639 if (temp != NULL && PyUnicode_Check(temp)) {
4640 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004641 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004642 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004643 goto unicode;
4644 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004645#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004646 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004647 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004648 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004649 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004650 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004651 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004652 if (!PyString_Check(temp)) {
4653 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004654 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004655 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004656 goto error;
4657 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004658 pbuf = PyString_AS_STRING(temp);
4659 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004660 if (prec >= 0 && len > prec)
4661 len = prec;
4662 break;
4663 case 'i':
4664 case 'd':
4665 case 'u':
4666 case 'o':
4667 case 'x':
4668 case 'X':
4669 if (c == 'i')
4670 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004671 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004672 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004673 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004674 prec, c, &pbuf, &ilen);
4675 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004676 if (!temp)
4677 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004678 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004679 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004680 else {
4681 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004682 len = formatint(pbuf,
4683 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004684 flags, prec, c, v);
4685 if (len < 0)
4686 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004687 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004688 }
4689 if (flags & F_ZERO)
4690 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004691 break;
4692 case 'e':
4693 case 'E':
4694 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004695 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004696 case 'g':
4697 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004698 if (c == 'F')
4699 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004700 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004701 len = formatfloat(pbuf, sizeof(formatbuf),
4702 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004703 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004704 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004705 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004706 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004707 fill = '0';
4708 break;
4709 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004710#ifdef Py_USING_UNICODE
4711 if (PyUnicode_Check(v)) {
4712 fmt = fmt_start;
4713 argidx = argidx_start;
4714 goto unicode;
4715 }
4716#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004717 pbuf = formatbuf;
4718 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004719 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004720 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004721 break;
4722 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004723 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004724 "unsupported format character '%c' (0x%x) "
4725 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004726 c, c,
4727 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004728 goto error;
4729 }
4730 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004731 if (*pbuf == '-' || *pbuf == '+') {
4732 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004733 len--;
4734 }
4735 else if (flags & F_SIGN)
4736 sign = '+';
4737 else if (flags & F_BLANK)
4738 sign = ' ';
4739 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004741 }
4742 if (width < len)
4743 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004744 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004745 reslen -= rescnt;
4746 rescnt = width + fmtcnt + 100;
4747 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004748 if (reslen < 0) {
4749 Py_DECREF(result);
4750 return PyErr_NoMemory();
4751 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004752 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004753 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004754 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004755 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004756 }
4757 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004758 if (fill != ' ')
4759 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004760 rescnt--;
4761 if (width > len)
4762 width--;
4763 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4765 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004766 assert(pbuf[1] == c);
4767 if (fill != ' ') {
4768 *res++ = *pbuf++;
4769 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 }
Tim Petersfff53252001-04-12 18:38:48 +00004771 rescnt -= 2;
4772 width -= 2;
4773 if (width < 0)
4774 width = 0;
4775 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004776 }
4777 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004778 do {
4779 --rescnt;
4780 *res++ = fill;
4781 } while (--width > len);
4782 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004783 if (fill == ' ') {
4784 if (sign)
4785 *res++ = sign;
4786 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004787 (c == 'x' || c == 'X')) {
4788 assert(pbuf[0] == '0');
4789 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 *res++ = *pbuf++;
4791 *res++ = *pbuf++;
4792 }
4793 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004794 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004795 res += len;
4796 rescnt -= len;
4797 while (--width >= len) {
4798 --rescnt;
4799 *res++ = ' ';
4800 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004801 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004802 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004803 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004804 goto error;
4805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004806 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004807 } /* '%' */
4808 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004809 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004811 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004812 goto error;
4813 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004814 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004815 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004816 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004817 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004818 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004819
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004820#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004821 unicode:
4822 if (args_owned) {
4823 Py_DECREF(args);
4824 args_owned = 0;
4825 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004826 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004827 if (PyTuple_Check(orig_args) && argidx > 0) {
4828 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004829 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004830 v = PyTuple_New(n);
4831 if (v == NULL)
4832 goto error;
4833 while (--n >= 0) {
4834 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4835 Py_INCREF(w);
4836 PyTuple_SET_ITEM(v, n, w);
4837 }
4838 args = v;
4839 } else {
4840 Py_INCREF(orig_args);
4841 args = orig_args;
4842 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004843 args_owned = 1;
4844 /* Take what we have of the result and let the Unicode formatting
4845 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004846 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004847 if (_PyString_Resize(&result, rescnt))
4848 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004849 fmtcnt = PyString_GET_SIZE(format) - \
4850 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004851 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4852 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004853 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004854 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004855 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004856 if (v == NULL)
4857 goto error;
4858 /* Paste what we have (result) to what the Unicode formatting
4859 function returned (v) and return the result (or error) */
4860 w = PyUnicode_Concat(result, v);
4861 Py_DECREF(result);
4862 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004863 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004864 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004865#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004866
Guido van Rossume5372401993-03-16 12:15:04 +00004867 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004868 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004869 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004871 }
Guido van Rossume5372401993-03-16 12:15:04 +00004872 return NULL;
4873}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004874
Guido van Rossum2a61e741997-01-18 07:55:05 +00004875void
Fred Drakeba096332000-07-09 07:04:36 +00004876PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004877{
4878 register PyStringObject *s = (PyStringObject *)(*p);
4879 PyObject *t;
4880 if (s == NULL || !PyString_Check(s))
4881 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004882 /* If it's a string subclass, we don't really know what putting
4883 it in the interned dict might do. */
4884 if (!PyString_CheckExact(s))
4885 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004886 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004887 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004888 if (interned == NULL) {
4889 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004890 if (interned == NULL) {
4891 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004892 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004893 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004894 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004895 t = PyDict_GetItem(interned, (PyObject *)s);
4896 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004897 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004898 Py_DECREF(*p);
4899 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004900 return;
4901 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004902
Armin Rigo79f7ad22004-08-07 19:27:39 +00004903 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004904 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004905 return;
4906 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004907 /* The two references in interned are not counted by refcnt.
4908 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004909 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004910 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004911}
4912
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004913void
4914PyString_InternImmortal(PyObject **p)
4915{
4916 PyString_InternInPlace(p);
4917 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4918 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4919 Py_INCREF(*p);
4920 }
4921}
4922
Guido van Rossum2a61e741997-01-18 07:55:05 +00004923
4924PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004925PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004926{
4927 PyObject *s = PyString_FromString(cp);
4928 if (s == NULL)
4929 return NULL;
4930 PyString_InternInPlace(&s);
4931 return s;
4932}
4933
Guido van Rossum8cf04761997-08-02 02:57:45 +00004934void
Fred Drakeba096332000-07-09 07:04:36 +00004935PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004936{
4937 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004938 for (i = 0; i < UCHAR_MAX + 1; i++) {
4939 Py_XDECREF(characters[i]);
4940 characters[i] = NULL;
4941 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004942 Py_XDECREF(nullstring);
4943 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004944}
Barry Warsawa903ad982001-02-23 16:40:48 +00004945
Barry Warsawa903ad982001-02-23 16:40:48 +00004946void _Py_ReleaseInternedStrings(void)
4947{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004948 PyObject *keys;
4949 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004950 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004951
4952 if (interned == NULL || !PyDict_Check(interned))
4953 return;
4954 keys = PyDict_Keys(interned);
4955 if (keys == NULL || !PyList_Check(keys)) {
4956 PyErr_Clear();
4957 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004958 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959
4960 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4961 detector, interned strings are not forcibly deallocated; rather, we
4962 give them their stolen references back, and then clear and DECREF
4963 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004964
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004965 fprintf(stderr, "releasing interned strings\n");
4966 n = PyList_GET_SIZE(keys);
4967 for (i = 0; i < n; i++) {
4968 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4969 switch (s->ob_sstate) {
4970 case SSTATE_NOT_INTERNED:
4971 /* XXX Shouldn't happen */
4972 break;
4973 case SSTATE_INTERNED_IMMORTAL:
4974 s->ob_refcnt += 1;
4975 break;
4976 case SSTATE_INTERNED_MORTAL:
4977 s->ob_refcnt += 2;
4978 break;
4979 default:
4980 Py_FatalError("Inconsistent interned string state.");
4981 }
4982 s->ob_sstate = SSTATE_NOT_INTERNED;
4983 }
4984 Py_DECREF(keys);
4985 PyDict_Clear(interned);
4986 Py_DECREF(interned);
4987 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004988}