blob: c4a6a7057048e3387bdedff0dafb525d97a6f927 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#ifdef USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb9479482006-05-26 17:22:38 +0000775#define STRINGLIB_NEW PyString_FromStringAndSize
776#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777
Fredrik Lundha50d2012006-05-26 17:04:58 +0000778#include "stringlib/fastsearch.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#endif
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
783/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784/* Methods */
785
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000786static int
Fred Drakeba096332000-07-09 07:04:36 +0000787string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000789 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000792
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000794 if (! PyString_CheckExact(op)) {
795 int ret;
796 /* A str subclass may have its own __str__ method. */
797 op = (PyStringObject *) PyObject_Str((PyObject *)op);
798 if (op == NULL)
799 return -1;
800 ret = string_print(op, fp, flags);
801 Py_DECREF(op);
802 return ret;
803 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000804 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000805#ifdef __VMS
806 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
807#else
808 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
809#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812
Thomas Wouters7e474022000-07-16 12:04:32 +0000813 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000815 if (memchr(op->ob_sval, '\'', op->ob_size) &&
816 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '"';
818
819 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000820 for (i = 0; i < op->ob_size; i++) {
821 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000823 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000824 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\r");
830 else if (c < ' ' || c >= 0x7f)
831 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000832 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000833 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000836 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837}
838
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000839PyObject *
840PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000843 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000844 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000845 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyErr_SetString(PyExc_OverflowError,
847 "string is too large to make repr");
848 }
849 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000851 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 }
853 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000854 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 register char c;
856 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000857 int quote;
858
Thomas Wouters7e474022000-07-16 12:04:32 +0000859 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000861 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000862 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000863 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 quote = '"';
865
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000869 /* There's at least enough room for a hex escape
870 and a closing quote. */
871 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000873 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000875 else if (c == '\t')
876 *p++ = '\\', *p++ = 't';
877 else if (c == '\n')
878 *p++ = '\\', *p++ = 'n';
879 else if (c == '\r')
880 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000881 else if (c < ' ' || c >= 0x7f) {
882 /* For performance, we don't want to call
883 PyOS_snprintf here (extra layers of
884 function call). */
885 sprintf(p, "\\x%02x", c & 0xff);
886 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000887 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000888 else
889 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000895 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000896 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898}
899
Guido van Rossum189f1df2001-05-01 16:51:53 +0000900static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000901string_repr(PyObject *op)
902{
903 return PyString_Repr(op, 1);
904}
905
906static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000907string_str(PyObject *s)
908{
Tim Petersc9933152001-10-16 20:18:24 +0000909 assert(PyString_Check(s));
910 if (PyString_CheckExact(s)) {
911 Py_INCREF(s);
912 return s;
913 }
914 else {
915 /* Subtype -- return genuine string with the same value. */
916 PyStringObject *t = (PyStringObject *) s;
917 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
918 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000919}
920
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000922string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923{
924 return a->ob_size;
925}
926
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000927static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000928string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929{
Andrew Dalke598710c2006-05-25 18:18:39 +0000930 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
932 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000933#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 if (PyUnicode_Check(bb))
935 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000936#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000937 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000938 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000939 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 return NULL;
941 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000942#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if ((a->ob_size == 0 || b->ob_size == 0) &&
945 PyString_CheckExact(a) && PyString_CheckExact(b)) {
946 if (a->ob_size == 0) {
947 Py_INCREF(bb);
948 return bb;
949 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 Py_INCREF(a);
951 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952 }
953 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000954 if (size < 0) {
955 PyErr_SetString(PyExc_OverflowError,
956 "strings are too large to concat");
957 return NULL;
958 }
959
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000960 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000961 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000962 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000964 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000965 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000966 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000967 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
968 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000969 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971#undef b
972}
973
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000975string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977 register Py_ssize_t i;
978 register Py_ssize_t j;
979 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000981 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000982 if (n < 0)
983 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000984 /* watch out for overflows: the size can overflow int,
985 * and the # of bytes needed can overflow size_t
986 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000988 if (n && size / n != a->ob_size) {
989 PyErr_SetString(PyExc_OverflowError,
990 "repeated string is too long");
991 return NULL;
992 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000993 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
Tim Peterse7c05322004-06-27 17:24:49 +0000997 nbytes = (size_t)size;
998 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000999 PyErr_SetString(PyExc_OverflowError,
1000 "repeated string is too long");
1001 return NULL;
1002 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001003 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001004 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001005 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001007 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001008 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001009 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001010 op->ob_sval[size] = '\0';
1011 if (a->ob_size == 1 && n > 0) {
1012 memset(op->ob_sval, a->ob_sval[0] , n);
1013 return (PyObject *) op;
1014 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001015 i = 0;
1016 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001017 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1018 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001019 }
1020 while (i < size) {
1021 j = (i <= size-i) ? i : size-i;
1022 memcpy(op->ob_sval+i, op->ob_sval, j);
1023 i += j;
1024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001026}
1027
1028/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1029
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001031string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001033 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001034{
1035 if (i < 0)
1036 i = 0;
1037 if (j < 0)
1038 j = 0; /* Avoid signed/unsigned bug in next line */
1039 if (j > a->ob_size)
1040 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001041 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1042 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043 Py_INCREF(a);
1044 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045 }
1046 if (j < i)
1047 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001049}
1050
Guido van Rossum9284a572000-03-07 15:53:43 +00001051static int
Fred Drakeba096332000-07-09 07:04:36 +00001052string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001053{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001054 char *s = PyString_AS_STRING(a);
1055 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001056 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001057#ifdef USE_FAST
1058 Py_ssize_t pos;
1059#else
1060 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001061 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001062 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001063#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001064
1065 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001067 if (PyUnicode_Check(el))
1068 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001070 if (!PyString_Check(el)) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "'in <string>' requires string as left operand");
1073 return -1;
1074 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001075 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001076
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001077 if (len_sub == 0)
1078 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001079
1080#ifdef USE_FAST
1081 pos = fastsearch(
1082 s, PyString_GET_SIZE(a),
1083 sub, len_sub, FAST_SEARCH
1084 );
1085 return (pos != -1);
1086#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001087 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001088 substring. When s<last, there is still room for a possible match
1089 and s[0] through s[len_sub-1] will be in bounds.
1090 shortsub is len_sub minus the last character which is checked
1091 separately just before the memcmp(). That check helps prevent
1092 false starts and saves the setup time for memcmp().
1093 */
1094 firstchar = sub[0];
1095 shortsub = len_sub - 1;
1096 lastchar = sub[shortsub];
1097 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1098 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001099 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001100 if (s == NULL)
1101 return 0;
1102 assert(s < last);
1103 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001104 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001105 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001106 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001107#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001108 return 0;
1109}
1110
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001113{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001114 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001115 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001116 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001117 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001118 return NULL;
1119 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001120 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001121 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001122 if (v == NULL)
1123 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001124 else {
1125#ifdef COUNT_ALLOCS
1126 one_strings++;
1127#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001128 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001129 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001130 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001131}
1132
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133static PyObject*
1134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001135{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001136 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001139 PyObject *result;
1140
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001143 result = Py_NotImplemented;
1144 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001145 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (a->ob_size == b->ob_size
1160 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001161 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001162 a->ob_size) == 0)) {
1163 result = Py_True;
1164 } else {
1165 result = Py_False;
1166 }
1167 goto out;
1168 }
1169 len_a = a->ob_size; len_b = b->ob_size;
1170 min_len = (len_a < len_b) ? len_a : len_b;
1171 if (min_len > 0) {
1172 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1173 if (c==0)
1174 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1175 }else
1176 c = 0;
1177 if (c == 0)
1178 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1179 switch (op) {
1180 case Py_LT: c = c < 0; break;
1181 case Py_LE: c = c <= 0; break;
1182 case Py_EQ: assert(0); break; /* unreachable */
1183 case Py_NE: c = c != 0; break;
1184 case Py_GT: c = c > 0; break;
1185 case Py_GE: c = c >= 0; break;
1186 default:
1187 result = Py_NotImplemented;
1188 goto out;
1189 }
1190 result = c ? Py_True : Py_False;
1191 out:
1192 Py_INCREF(result);
1193 return result;
1194}
1195
1196int
1197_PyString_Eq(PyObject *o1, PyObject *o2)
1198{
1199 PyStringObject *a, *b;
1200 a = (PyStringObject*)o1;
1201 b = (PyStringObject*)o2;
1202 return a->ob_size == b->ob_size
1203 && *a->ob_sval == *b->ob_sval
1204 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001205}
1206
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207static long
Fred Drakeba096332000-07-09 07:04:36 +00001208string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001209{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001210 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001211 register unsigned char *p;
1212 register long x;
1213
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001214 if (a->ob_shash != -1)
1215 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001216 len = a->ob_size;
1217 p = (unsigned char *) a->ob_sval;
1218 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001219 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001220 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001221 x ^= a->ob_size;
1222 if (x == -1)
1223 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001224 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001225 return x;
1226}
1227
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001228#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1229
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230static PyObject*
1231string_subscript(PyStringObject* self, PyObject* item)
1232{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001233 PyNumberMethods *nb = item->ob_type->tp_as_number;
1234 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1235 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001236 if (i == -1 && PyErr_Occurred())
1237 return NULL;
1238 if (i < 0)
1239 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001240 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241 }
1242 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001243 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 char* source_buf;
1245 char* result_buf;
1246 PyObject* result;
1247
Tim Petersae1d0c92006-03-17 03:29:34 +00001248 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001249 PyString_GET_SIZE(self),
1250 &start, &stop, &step, &slicelength) < 0) {
1251 return NULL;
1252 }
1253
1254 if (slicelength <= 0) {
1255 return PyString_FromStringAndSize("", 0);
1256 }
1257 else {
1258 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001259 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001260 if (result_buf == NULL)
1261 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001262
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 cur += step, i++) {
1265 result_buf[i] = source_buf[cur];
1266 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001267
1268 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001269 slicelength);
1270 PyMem_Free(result_buf);
1271 return result;
1272 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001273 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001274 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001275 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001276 "string indices must be integers");
1277 return NULL;
1278 }
1279}
1280
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281static Py_ssize_t
1282string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001283{
1284 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001285 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001286 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001287 return -1;
1288 }
1289 *ptr = (void *)self->ob_sval;
1290 return self->ob_size;
1291}
1292
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293static Py_ssize_t
1294string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001295{
Guido van Rossum045e6881997-09-08 18:30:11 +00001296 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001297 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298 return -1;
1299}
1300
Martin v. Löwis18e16552006-02-15 17:27:45 +00001301static Py_ssize_t
1302string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001303{
1304 if ( lenp )
1305 *lenp = self->ob_size;
1306 return 1;
1307}
1308
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309static Py_ssize_t
1310string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001311{
1312 if ( index != 0 ) {
1313 PyErr_SetString(PyExc_SystemError,
1314 "accessing non-existent string segment");
1315 return -1;
1316 }
1317 *ptr = self->ob_sval;
1318 return self->ob_size;
1319}
1320
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001321static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001322 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001323 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (ssizeargfunc)string_repeat, /*sq_repeat*/
1325 (ssizeargfunc)string_item, /*sq_item*/
1326 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001327 0, /*sq_ass_item*/
1328 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001329 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001330};
1331
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001333 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001334 (binaryfunc)string_subscript,
1335 0,
1336};
1337
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001338static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001339 (readbufferproc)string_buffer_getreadbuf,
1340 (writebufferproc)string_buffer_getwritebuf,
1341 (segcountproc)string_buffer_getsegcount,
1342 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001343};
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345
1346
1347#define LEFTSTRIP 0
1348#define RIGHTSTRIP 1
1349#define BOTHSTRIP 2
1350
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001351/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001352static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1353
1354#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001355
Andrew Dalke525eab32006-05-26 14:00:45 +00001356
1357/* Overallocate the initial list to reduce the number of reallocs for small
1358 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1359 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1360 text (roughly 11 words per line) and field delimited data (usually 1-10
1361 fields). For large strings the split algorithms are bandwidth limited
1362 so increasing the preallocation likely will not improve things.*/
1363
1364#define MAX_PREALLOC 12
1365
1366/* 5 splits gives 6 elements */
1367#define PREALLOC_SIZE(maxsplit) \
1368 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1369
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370#define SPLIT_APPEND(data, left, right) \
1371 str = PyString_FromStringAndSize((data) + (left), \
1372 (right) - (left)); \
1373 if (str == NULL) \
1374 goto onError; \
1375 if (PyList_Append(list, str)) { \
1376 Py_DECREF(str); \
1377 goto onError; \
1378 } \
1379 else \
1380 Py_DECREF(str);
1381
Andrew Dalke02758d62006-05-26 15:21:01 +00001382#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001383 str = PyString_FromStringAndSize((data) + (left), \
1384 (right) - (left)); \
1385 if (str == NULL) \
1386 goto onError; \
1387 if (count < MAX_PREALLOC) { \
1388 PyList_SET_ITEM(list, count, str); \
1389 } else { \
1390 if (PyList_Append(list, str)) { \
1391 Py_DECREF(str); \
1392 goto onError; \
1393 } \
1394 else \
1395 Py_DECREF(str); \
1396 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001397 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001398
1399/* Always force the list to the expected size. */
1400#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1401
Andrew Dalke02758d62006-05-26 15:21:01 +00001402#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1403#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1404#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1405#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1406
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001407Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001408split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409{
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413
1414 if (list == NULL)
1415 return NULL;
1416
Andrew Dalke02758d62006-05-26 15:21:01 +00001417 i = j = 0;
1418
1419 while (maxsplit-- > 0) {
1420 SKIP_SPACE(s, i, len);
1421 if (i==len) break;
1422 j = i; i++;
1423 SKIP_NONSPACE(s, i, len);
1424 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001426
1427 if (i < len) {
1428 /* Only occurs when maxsplit was reached */
1429 /* Skip any remaining whitespace and copy to end of string */
1430 SKIP_SPACE(s, i, len);
1431 if (i != len)
1432 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001434 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 Py_DECREF(list);
1438 return NULL;
1439}
1440
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001441Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001442split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443{
Andrew Dalke525eab32006-05-26 14:00:45 +00001444 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001446 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447
1448 if (list == NULL)
1449 return NULL;
1450
1451 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001452 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001453 if (s[i] == ch) {
1454 if (maxcount-- <= 0)
1455 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001456 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001457 i = j = i + 1;
1458 } else
1459 i++;
1460 }
1461 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 return list;
1466
1467 onError:
1468 Py_DECREF(list);
1469 return NULL;
1470}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001472PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473"S.split([sep [,maxsplit]]) -> list of strings\n\
1474\n\
1475Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001477splits are done. If sep is not specified or is None, any\n\
1478whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479
1480static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001481string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001483 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001484 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001486 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487
Martin v. Löwis9c830762006-04-13 08:37:17 +00001488 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001491 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494 if (PyString_Check(subobj)) {
1495 sub = PyString_AS_STRING(subobj);
1496 n = PyString_GET_SIZE(subobj);
1497 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001498#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 else if (PyUnicode_Check(subobj))
1500 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001501#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1503 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001504
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505 if (n == 0) {
1506 PyErr_SetString(PyExc_ValueError, "empty separator");
1507 return NULL;
1508 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001509 else if (n == 1)
1510 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511
Andrew Dalke525eab32006-05-26 14:00:45 +00001512 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513 if (list == NULL)
1514 return NULL;
1515
1516 i = j = 0;
1517 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001518 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001519 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001520 if (maxsplit-- <= 0)
1521 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001522 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 }
1525 else
1526 i++;
1527 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001528 SPLIT_ADD(s, j, len);
1529 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 return list;
1531
Andrew Dalke525eab32006-05-26 14:00:45 +00001532 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 Py_DECREF(list);
1534 return NULL;
1535}
1536
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001537PyDoc_STRVAR(partition__doc__,
1538"S.partition(sep) -> (head, sep, tail)\n\
1539\n\
1540Searches for the separator sep in S, and returns the part before it,\n\
1541the separator itself, and the part after it. If the separator is not\n\
1542found, returns S and two empty strings.");
1543
1544static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001545string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001547 const char *sep;
1548 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550 if (PyString_Check(sep_obj)) {
1551 sep = PyString_AS_STRING(sep_obj);
1552 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001553 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001554#ifdef Py_USING_UNICODE
1555 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001556 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001558 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001559 return NULL;
1560
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001561 return partition(
1562 (PyObject*) self,
1563 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1564 sep_obj, sep, sep_len
1565 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001566}
1567
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001568Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001569rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001570{
Andrew Dalke525eab32006-05-26 14:00:45 +00001571 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001572 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001573 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001574
1575 if (list == NULL)
1576 return NULL;
1577
Andrew Dalke02758d62006-05-26 15:21:01 +00001578 i = j = len-1;
1579
1580 while (maxsplit-- > 0) {
1581 RSKIP_SPACE(s, i);
1582 if (i<0) break;
1583 j = i; i--;
1584 RSKIP_NONSPACE(s, i);
1585 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001586 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001587 if (i >= 0) {
1588 /* Only occurs when maxsplit was reached */
1589 /* Skip any remaining whitespace and copy to beginning of string */
1590 RSKIP_SPACE(s, i);
1591 if (i >= 0)
1592 SPLIT_ADD(s, 0, i + 1);
1593
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001594 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001595 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001596 if (PyList_Reverse(list) < 0)
1597 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001599 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001600 Py_DECREF(list);
1601 return NULL;
1602}
1603
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001604Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001605rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001606{
Andrew Dalke525eab32006-05-26 14:00:45 +00001607 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001608 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001609 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001610
1611 if (list == NULL)
1612 return NULL;
1613
1614 for (i = j = len - 1; i >= 0; ) {
1615 if (s[i] == ch) {
1616 if (maxcount-- <= 0)
1617 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001618 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619 j = i = i - 1;
1620 } else
1621 i--;
1622 }
1623 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001624 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001625 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001626 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001627 if (PyList_Reverse(list) < 0)
1628 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 return list;
1630
1631 onError:
1632 Py_DECREF(list);
1633 return NULL;
1634}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001635
1636PyDoc_STRVAR(rsplit__doc__,
1637"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1638\n\
1639Return a list of the words in the string S, using sep as the\n\
1640delimiter string, starting at the end of the string and working\n\
1641to the front. If maxsplit is given, at most maxsplit splits are\n\
1642done. If sep is not specified or is None, any whitespace string\n\
1643is a separator.");
1644
1645static PyObject *
1646string_rsplit(PyStringObject *self, PyObject *args)
1647{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001648 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001650 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001651 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001652
Martin v. Löwis9c830762006-04-13 08:37:17 +00001653 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001654 return NULL;
1655 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001656 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657 if (subobj == Py_None)
1658 return rsplit_whitespace(s, len, maxsplit);
1659 if (PyString_Check(subobj)) {
1660 sub = PyString_AS_STRING(subobj);
1661 n = PyString_GET_SIZE(subobj);
1662 }
1663#ifdef Py_USING_UNICODE
1664 else if (PyUnicode_Check(subobj))
1665 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1666#endif
1667 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1668 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001669
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001670 if (n == 0) {
1671 PyErr_SetString(PyExc_ValueError, "empty separator");
1672 return NULL;
1673 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001674 else if (n == 1)
1675 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Andrew Dalke525eab32006-05-26 14:00:45 +00001677 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 if (list == NULL)
1679 return NULL;
1680
1681 j = len;
1682 i = j - n;
1683 while (i >= 0) {
1684 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1685 if (maxsplit-- <= 0)
1686 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001687 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001688 j = i;
1689 i -= n;
1690 }
1691 else
1692 i--;
1693 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001694 SPLIT_ADD(s, 0, j);
1695 FIX_PREALLOC_SIZE(list);
1696 if (PyList_Reverse(list) < 0)
1697 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 return list;
1699
Andrew Dalke525eab32006-05-26 14:00:45 +00001700onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 Py_DECREF(list);
1702 return NULL;
1703}
1704
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001706PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707"S.join(sequence) -> string\n\
1708\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001710sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711
1712static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001713string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714{
1715 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001716 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001719 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001720 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001721 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001722 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723
Tim Peters19fe14e2001-01-19 03:03:47 +00001724 seq = PySequence_Fast(orig, "");
1725 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001726 return NULL;
1727 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001728
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001729 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001730 if (seqlen == 0) {
1731 Py_DECREF(seq);
1732 return PyString_FromString("");
1733 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001735 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001736 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1737 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001738 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001739 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001740 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001742
Raymond Hettinger674f2412004-08-23 23:23:54 +00001743 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001744 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001745 * Do a pre-pass to figure out the total amount of space we'll
1746 * need (sz), see whether any argument is absurd, and defer to
1747 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001749 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001750 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001751 item = PySequence_Fast_GET_ITEM(seq, i);
1752 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001753#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001754 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001755 /* Defer to Unicode join.
1756 * CAUTION: There's no gurantee that the
1757 * original sequence can be iterated over
1758 * again, so we must pass seq here.
1759 */
1760 PyObject *result;
1761 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001762 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001763 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001765#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001767 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001768 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001769 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001770 Py_DECREF(seq);
1771 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001772 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 sz += PyString_GET_SIZE(item);
1774 if (i != 0)
1775 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001776 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001777 PyErr_SetString(PyExc_OverflowError,
1778 "join() is too long for a Python string");
1779 Py_DECREF(seq);
1780 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 }
1783
1784 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001785 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001786 if (res == NULL) {
1787 Py_DECREF(seq);
1788 return NULL;
1789 }
1790
1791 /* Catenate everything. */
1792 p = PyString_AS_STRING(res);
1793 for (i = 0; i < seqlen; ++i) {
1794 size_t n;
1795 item = PySequence_Fast_GET_ITEM(seq, i);
1796 n = PyString_GET_SIZE(item);
1797 memcpy(p, PyString_AS_STRING(item), n);
1798 p += n;
1799 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001800 memcpy(p, sep, seplen);
1801 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001802 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001804
Jeremy Hylton49048292000-07-11 03:28:17 +00001805 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807}
1808
Tim Peters52e155e2001-06-16 05:42:57 +00001809PyObject *
1810_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001811{
Tim Petersa7259592001-06-16 05:11:17 +00001812 assert(sep != NULL && PyString_Check(sep));
1813 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001814 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001815}
1816
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001817Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001819{
1820 if (*end > len)
1821 *end = len;
1822 else if (*end < 0)
1823 *end += len;
1824 if (*end < 0)
1825 *end = 0;
1826 if (*start < 0)
1827 *start += len;
1828 if (*start < 0)
1829 *start = 0;
1830}
1831
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001832Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001833string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001836 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001837 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001838 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001841 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001842 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 return -2;
1844 if (PyString_Check(subobj)) {
1845 sub = PyString_AS_STRING(subobj);
1846 n = PyString_GET_SIZE(subobj);
1847 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001848#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001850 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001851#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 return -2;
1854
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001855 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001857#ifdef USE_FAST
1858 if (n == 0)
1859 return (dir > 0) ? i : last;
1860 if (dir > 0) {
1861 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1862 FAST_SEARCH);
1863 if (pos < 0)
1864 return pos;
1865 return pos + i;
1866 }
1867#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 if (dir > 0) {
1869 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 last -= n;
1872 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001873 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 return (long)i;
1875 }
1876 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001877 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001878
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001880 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001882 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001883 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001884 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001885
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 return -1;
1887}
1888
1889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001890PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891"S.find(sub [,start [,end]]) -> int\n\
1892\n\
1893Return the lowest index in S where substring sub is found,\n\
1894such that sub is contained within s[start,end]. Optional\n\
1895arguments start and end are interpreted as in slice notation.\n\
1896\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898
1899static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001900string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 if (result == -2)
1904 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001909PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910"S.index(sub [,start [,end]]) -> int\n\
1911\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001912Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
1914static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001915string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 if (result == -2)
1919 return NULL;
1920 if (result == -1) {
1921 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001922 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 return NULL;
1924 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001925 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926}
1927
1928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001929PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930"S.rfind(sub [,start [,end]]) -> int\n\
1931\n\
1932Return the highest index in S where substring sub is found,\n\
1933such that sub is contained within s[start,end]. Optional\n\
1934arguments start and end are interpreted as in slice notation.\n\
1935\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937
1938static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001939string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 if (result == -2)
1943 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.rindex(sub [,start [,end]]) -> int\n\
1950\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
1953static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001954string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001956 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 if (result == -2)
1958 return NULL;
1959 if (result == -1) {
1960 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001961 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 return NULL;
1963 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001964 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965}
1966
1967
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001968Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1970{
1971 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001972 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1975 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976
1977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
1979 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1980 i++;
1981 }
1982 }
1983
1984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
1988 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1989 j++;
1990 }
1991
1992 if (i == 0 && j == len && PyString_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyString_FromStringAndSize(s+i, j-i);
1998}
1999
2000
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002001Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002002do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
2004 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 i = 0;
2008 if (striptype != RIGHTSTRIP) {
2009 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2010 i++;
2011 }
2012 }
2013
2014 j = len;
2015 if (striptype != LEFTSTRIP) {
2016 do {
2017 j--;
2018 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2019 j++;
2020 }
2021
Tim Peters8fa5dd02001-09-12 02:18:30 +00002022 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 Py_INCREF(self);
2024 return (PyObject*)self;
2025 }
2026 else
2027 return PyString_FromStringAndSize(s+i, j-i);
2028}
2029
2030
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002031Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002032do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2033{
2034 PyObject *sep = NULL;
2035
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002036 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002037 return NULL;
2038
2039 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002040 if (PyString_Check(sep))
2041 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002042#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002043 else if (PyUnicode_Check(sep)) {
2044 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2045 PyObject *res;
2046 if (uniself==NULL)
2047 return NULL;
2048 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2049 striptype, sep);
2050 Py_DECREF(uniself);
2051 return res;
2052 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002054 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 STRIPNAME(striptype));
2061 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 }
2063
2064 return do_strip(self, striptype);
2065}
2066
2067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002069"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070\n\
2071Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002072whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002073If chars is given and not None, remove characters in chars instead.\n\
2074If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
2076static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079 if (PyTuple_GET_SIZE(args) == 0)
2080 return do_strip(self, BOTHSTRIP); /* Common case */
2081 else
2082 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083}
2084
2085
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002086PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002087"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002089Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002090If chars is given and not None, remove characters in chars instead.\n\
2091If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
2093static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096 if (PyTuple_GET_SIZE(args) == 0)
2097 return do_strip(self, LEFTSTRIP); /* Common case */
2098 else
2099 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100}
2101
2102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002104"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002106Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002107If chars is given and not None, remove characters in chars instead.\n\
2108If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109
2110static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113 if (PyTuple_GET_SIZE(args) == 0)
2114 return do_strip(self, RIGHTSTRIP); /* Common case */
2115 else
2116 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117}
2118
2119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002120PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121"S.lower() -> string\n\
2122\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002123Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002125/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2126#ifndef _tolower
2127#define _tolower tolower
2128#endif
2129
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002131string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002133 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002134 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002135 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002137 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140
2141 s = PyString_AS_STRING(newobj);
2142
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002143 memcpy(s, PyString_AS_STRING(self), n);
2144
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002146 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002147 if (isupper(c))
2148 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002150
Anthony Baxtera6286212006-04-11 07:42:36 +00002151 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152}
2153
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002154PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155"S.upper() -> string\n\
2156\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002159#ifndef _toupper
2160#define _toupper toupper
2161#endif
2162
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002166 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002168 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002170 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173
2174 s = PyString_AS_STRING(newobj);
2175
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002176 memcpy(s, PyString_AS_STRING(self), n);
2177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180 if (islower(c))
2181 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002183
Anthony Baxtera6286212006-04-11 07:42:36 +00002184 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185}
2186
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188"S.title() -> string\n\
2189\n\
2190Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002191characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192
2193static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002194string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195{
2196 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002197 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002199 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 newobj = PyString_FromStringAndSize(NULL, n);
2202 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002204 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 for (i = 0; i < n; i++) {
2206 int c = Py_CHARMASK(*s++);
2207 if (islower(c)) {
2208 if (!previous_is_cased)
2209 c = toupper(c);
2210 previous_is_cased = 1;
2211 } else if (isupper(c)) {
2212 if (previous_is_cased)
2213 c = tolower(c);
2214 previous_is_cased = 1;
2215 } else
2216 previous_is_cased = 0;
2217 *s_new++ = c;
2218 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002219 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220}
2221
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223"S.capitalize() -> string\n\
2224\n\
2225Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002226capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
2228static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002229string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230{
2231 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002232 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002233 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Anthony Baxtera6286212006-04-11 07:42:36 +00002235 newobj = PyString_FromStringAndSize(NULL, n);
2236 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002238 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 if (0 < n) {
2240 int c = Py_CHARMASK(*s++);
2241 if (islower(c))
2242 *s_new = toupper(c);
2243 else
2244 *s_new = c;
2245 s_new++;
2246 }
2247 for (i = 1; i < n; i++) {
2248 int c = Py_CHARMASK(*s++);
2249 if (isupper(c))
2250 *s_new = tolower(c);
2251 else
2252 *s_new = c;
2253 s_new++;
2254 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002255 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256}
2257
2258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260"S.count(sub[, start[, end]]) -> int\n\
2261\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002262Return the number of non-overlapping occurrences of substring sub in\n\
2263string S[start:end]. Optional arguments start and end are interpreted\n\
2264as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
2266static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002267string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002269 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002270 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002271 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002272 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
Guido van Rossumc6821402000-05-08 14:08:05 +00002275 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2276 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002278
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 if (PyString_Check(subobj)) {
2280 sub = PyString_AS_STRING(subobj);
2281 n = PyString_GET_SIZE(subobj);
2282 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002283#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002284 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002285 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002286 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2287 if (count == -1)
2288 return NULL;
2289 else
2290 return PyInt_FromLong((long) count);
2291 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002292#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2294 return NULL;
2295
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002296 string_adjust_indices(&i, &last, len);
2297
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298 m = last + 1 - n;
2299 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002300 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301
Fredrik Lundhaf722372006-05-25 17:55:31 +00002302#ifdef USE_FAST
2303 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2304 if (r < 0)
2305 r = 0; /* no match */
2306#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 r = 0;
2308 while (i < m) {
Fredrik Lundha50d2012006-05-26 17:04:58 +00002309 const char *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 if (!memcmp(s+i, sub, n)) {
2311 r++;
2312 i += n;
2313 } else {
2314 i++;
2315 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002316 if (i >= m)
2317 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002318 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002319 if (t == NULL)
2320 break;
2321 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002323#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002324 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325}
2326
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002327PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328"S.swapcase() -> string\n\
2329\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002330Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002331converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332
2333static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002334string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335{
2336 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002337 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002338 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339
Anthony Baxtera6286212006-04-11 07:42:36 +00002340 newobj = PyString_FromStringAndSize(NULL, n);
2341 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002343 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 for (i = 0; i < n; i++) {
2345 int c = Py_CHARMASK(*s++);
2346 if (islower(c)) {
2347 *s_new = toupper(c);
2348 }
2349 else if (isupper(c)) {
2350 *s_new = tolower(c);
2351 }
2352 else
2353 *s_new = c;
2354 s_new++;
2355 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002356 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357}
2358
2359
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002360PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361"S.translate(table [,deletechars]) -> string\n\
2362\n\
2363Return a copy of the string S, where all characters occurring\n\
2364in the optional argument deletechars are removed, and the\n\
2365remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002366translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367
2368static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002369string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 register char *input, *output;
2372 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002373 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002376 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 PyObject *result;
2378 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002381 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384
2385 if (PyString_Check(tableobj)) {
2386 table1 = PyString_AS_STRING(tableobj);
2387 tablen = PyString_GET_SIZE(tableobj);
2388 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002389#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002391 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 parameter; instead a mapping to None will cause characters
2393 to be deleted. */
2394 if (delobj != NULL) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2398 }
2399 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404
Martin v. Löwis00b61272002-12-12 20:03:19 +00002405 if (tablen != 256) {
2406 PyErr_SetString(PyExc_ValueError,
2407 "translation table must be 256 characters long");
2408 return NULL;
2409 }
2410
Guido van Rossum4c08d552000-03-10 22:55:18 +00002411 if (delobj != NULL) {
2412 if (PyString_Check(delobj)) {
2413 del_table = PyString_AS_STRING(delobj);
2414 dellen = PyString_GET_SIZE(delobj);
2415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002416#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 else if (PyUnicode_Check(delobj)) {
2418 PyErr_SetString(PyExc_TypeError,
2419 "deletions are implemented differently for unicode");
2420 return NULL;
2421 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002422#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2424 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 }
2426 else {
2427 del_table = NULL;
2428 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 }
2430
2431 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002432 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 result = PyString_FromStringAndSize((char *)NULL, inlen);
2434 if (result == NULL)
2435 return NULL;
2436 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002437 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439 if (dellen == 0) {
2440 /* If no deletions are required, use faster code */
2441 for (i = inlen; --i >= 0; ) {
2442 c = Py_CHARMASK(*input++);
2443 if (Py_CHARMASK((*output++ = table[c])) != c)
2444 changed = 1;
2445 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002446 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447 return result;
2448 Py_DECREF(result);
2449 Py_INCREF(input_obj);
2450 return input_obj;
2451 }
2452
2453 for (i = 0; i < 256; i++)
2454 trans_table[i] = Py_CHARMASK(table[i]);
2455
2456 for (i = 0; i < dellen; i++)
2457 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2458
2459 for (i = inlen; --i >= 0; ) {
2460 c = Py_CHARMASK(*input++);
2461 if (trans_table[c] != -1)
2462 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2463 continue;
2464 changed = 1;
2465 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002466 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467 Py_DECREF(result);
2468 Py_INCREF(input_obj);
2469 return input_obj;
2470 }
2471 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002472 if (inlen > 0)
2473 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474 return result;
2475}
2476
2477
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002478#define FORWARD 1
2479#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002481/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483/* Don't call if length < 2 */
2484#define Py_STRING_MATCH(target, offset, pattern, length) \
2485 (target[offset] == pattern[0] && \
2486 target[offset+length-1] == pattern[length-1] && \
2487 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002489#define findchar(target, target_len, c) \
2490 ((char *)memchr((const void *)(target), c, target_len))
2491
2492/* String ops must return a string. */
2493/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002494Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002495return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002497 if (PyString_CheckExact(self)) {
2498 Py_INCREF(self);
2499 return self;
2500 }
2501 return (PyStringObject *)PyString_FromStringAndSize(
2502 PyString_AS_STRING(self),
2503 PyString_GET_SIZE(self));
2504}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002506Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002507countchar(char *target, int target_len, char c)
2508{
2509 Py_ssize_t count=0;
2510 char *start=target;
2511 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002513 while ( (start=findchar(start, end-start, c)) != NULL ) {
2514 count++;
2515 start += 1;
2516 }
2517
2518 return count;
2519}
2520
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002521Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002522findstring(char *target, Py_ssize_t target_len,
2523 char *pattern, Py_ssize_t pattern_len,
2524 Py_ssize_t start,
2525 Py_ssize_t end,
2526 int direction)
2527{
2528 if (start < 0) {
2529 start += target_len;
2530 if (start < 0)
2531 start = 0;
2532 }
2533 if (end > target_len) {
2534 end = target_len;
2535 } else if (end < 0) {
2536 end += target_len;
2537 if (end < 0)
2538 end = 0;
2539 }
2540
2541 /* zero-length substrings always match at the first attempt */
2542 if (pattern_len == 0)
2543 return (direction > 0) ? start : end;
2544
2545 end -= pattern_len;
2546
2547 if (direction < 0) {
2548 for (; end >= start; end--)
2549 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2550 return end;
2551 } else {
2552 for (; start <= end; start++)
2553 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2554 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002555 }
2556 return -1;
2557}
2558
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002559Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560countstring(char *target, Py_ssize_t target_len,
2561 char *pattern, Py_ssize_t pattern_len,
2562 Py_ssize_t start,
2563 Py_ssize_t end,
2564 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 if (start < 0) {
2569 start += target_len;
2570 if (start < 0)
2571 start = 0;
2572 }
2573 if (end > target_len) {
2574 end = target_len;
2575 } else if (end < 0) {
2576 end += target_len;
2577 if (end < 0)
2578 end = 0;
2579 }
2580
2581 /* zero-length substrings match everywhere */
2582 if (pattern_len == 0)
2583 return target_len+1;
2584
2585 end -= pattern_len;
2586
2587 if (direction < 0) {
2588 for (; end >= start; end--)
2589 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2590 count++;
2591 end -= pattern_len-1;
2592 }
2593 } else {
2594 for (; start <= end; start++)
2595 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2596 count++;
2597 start += pattern_len-1;
2598 }
2599 }
2600 return count;
2601}
2602
2603
2604/* Algorithms for difference cases of string replacement */
2605
2606/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002607Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002608replace_interleave(PyStringObject *self,
2609 PyStringObject *to,
2610 Py_ssize_t maxcount)
2611{
2612 char *self_s, *to_s, *result_s;
2613 Py_ssize_t self_len, to_len, result_len;
2614 Py_ssize_t count, i, product;
2615 PyStringObject *result;
2616
2617 self_len = PyString_GET_SIZE(self);
2618 to_len = PyString_GET_SIZE(to);
2619
2620 /* 1 at the end plus 1 after every character */
2621 count = self_len+1;
2622 if (maxcount < count)
2623 count = maxcount;
2624
2625 /* Check for overflow */
2626 /* result_len = count * to_len + self_len; */
2627 product = count * to_len;
2628 if (product / to_len != count) {
2629 PyErr_SetString(PyExc_OverflowError,
2630 "replace string is too long");
2631 return NULL;
2632 }
2633 result_len = product + self_len;
2634 if (result_len < 0) {
2635 PyErr_SetString(PyExc_OverflowError,
2636 "replace string is too long");
2637 return NULL;
2638 }
2639
2640 if (! (result = (PyStringObject *)
2641 PyString_FromStringAndSize(NULL, result_len)) )
2642 return NULL;
2643
2644 self_s = PyString_AS_STRING(self);
2645 to_s = PyString_AS_STRING(to);
2646 to_len = PyString_GET_SIZE(to);
2647 result_s = PyString_AS_STRING(result);
2648
2649 /* TODO: special case single character, which doesn't need memcpy */
2650
2651 /* Lay the first one down (guaranteed this will occur) */
2652 memcpy(result_s, to_s, to_len);
2653 result_s += to_len;
2654 count -= 1;
2655
2656 for (i=0; i<count; i++) {
2657 *result_s++ = *self_s++;
2658 memcpy(result_s, to_s, to_len);
2659 result_s += to_len;
2660 }
2661
2662 /* Copy the rest of the original string */
2663 memcpy(result_s, self_s, self_len-i);
2664
2665 return result;
2666}
2667
2668/* Special case for deleting a single character */
2669/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002670Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671replace_delete_single_character(PyStringObject *self,
2672 char from_c, Py_ssize_t maxcount)
2673{
2674 char *self_s, *result_s;
2675 char *start, *next, *end;
2676 Py_ssize_t self_len, result_len;
2677 Py_ssize_t count;
2678 PyStringObject *result;
2679
2680 self_len = PyString_GET_SIZE(self);
2681 self_s = PyString_AS_STRING(self);
2682
2683 count = countchar(self_s, self_len, from_c);
2684 if (count == 0) {
2685 return return_self(self);
2686 }
2687 if (count > maxcount)
2688 count = maxcount;
2689
2690 result_len = self_len - count; /* from_len == 1 */
2691 assert(result_len>=0);
2692
2693 if ( (result = (PyStringObject *)
2694 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2695 return NULL;
2696 result_s = PyString_AS_STRING(result);
2697
2698 start = self_s;
2699 end = self_s + self_len;
2700 while (count-- > 0) {
2701 next = findchar(start, end-start, from_c);
2702 if (next == NULL)
2703 break;
2704 memcpy(result_s, start, next-start);
2705 result_s += (next-start);
2706 start = next+1;
2707 }
2708 memcpy(result_s, start, end-start);
2709
2710 return result;
2711}
2712
2713/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2714
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002715Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716replace_delete_substring(PyStringObject *self, PyStringObject *from,
2717 Py_ssize_t maxcount) {
2718 char *self_s, *from_s, *result_s;
2719 char *start, *next, *end;
2720 Py_ssize_t self_len, from_len, result_len;
2721 Py_ssize_t count, offset;
2722 PyStringObject *result;
2723
2724 self_len = PyString_GET_SIZE(self);
2725 self_s = PyString_AS_STRING(self);
2726 from_len = PyString_GET_SIZE(from);
2727 from_s = PyString_AS_STRING(from);
2728
2729 count = countstring(self_s, self_len,
2730 from_s, from_len,
2731 0, self_len, 1);
2732
2733 if (count > maxcount)
2734 count = maxcount;
2735
2736 if (count == 0) {
2737 /* no matches */
2738 return return_self(self);
2739 }
2740
2741 result_len = self_len - (count * from_len);
2742 assert (result_len>=0);
2743
2744 if ( (result = (PyStringObject *)
2745 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2746 return NULL;
2747
2748 result_s = PyString_AS_STRING(result);
2749
2750 start = self_s;
2751 end = self_s + self_len;
2752 while (count-- > 0) {
2753 offset = findstring(start, end-start,
2754 from_s, from_len,
2755 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002756 if (offset == -1)
2757 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 next = start + offset;
2759
2760 memcpy(result_s, start, next-start);
2761
2762 result_s += (next-start);
2763 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002764 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 memcpy(result_s, start, end-start);
2766 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002767}
2768
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002770Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771replace_single_character_in_place(PyStringObject *self,
2772 char from_c, char to_c,
2773 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002774{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002775 char *self_s, *result_s, *start, *end, *next;
2776 Py_ssize_t self_len;
2777 PyStringObject *result;
2778
2779 /* The result string will be the same size */
2780 self_s = PyString_AS_STRING(self);
2781 self_len = PyString_GET_SIZE(self);
2782
2783 next = findchar(self_s, self_len, from_c);
2784
2785 if (next == NULL) {
2786 /* No matches; return the original string */
2787 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002788 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789
2790 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002791 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 if (result == NULL)
2793 return NULL;
2794 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002795 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796
2797 /* change everything in-place, starting with this one */
2798 start = result_s + (next-self_s);
2799 *start = to_c;
2800 start++;
2801 end = result_s + self_len;
2802
2803 while (--maxcount > 0) {
2804 next = findchar(start, end-start, from_c);
2805 if (next == NULL)
2806 break;
2807 *next = to_c;
2808 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002809 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810
2811 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002812}
2813
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002815Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816replace_substring_in_place(PyStringObject *self,
2817 PyStringObject *from,
2818 PyStringObject *to,
2819 Py_ssize_t maxcount)
2820{
2821 char *result_s, *start, *end;
2822 char *self_s, *from_s, *to_s;
2823 Py_ssize_t self_len, from_len, offset;
2824 PyStringObject *result;
2825
2826 /* The result string will be the same size */
2827
2828 self_s = PyString_AS_STRING(self);
2829 self_len = PyString_GET_SIZE(self);
2830
2831 from_s = PyString_AS_STRING(from);
2832 from_len = PyString_GET_SIZE(from);
2833 to_s = PyString_AS_STRING(to);
2834
2835 offset = findstring(self_s, self_len,
2836 from_s, from_len,
2837 0, self_len, FORWARD);
2838
2839 if (offset == -1) {
2840 /* No matches; return the original string */
2841 return return_self(self);
2842 }
2843
2844 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002845 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 if (result == NULL)
2847 return NULL;
2848 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002849 memcpy(result_s, self_s, self_len);
2850
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002851
2852 /* change everything in-place, starting with this one */
2853 start = result_s + offset;
2854 memcpy(start, to_s, from_len);
2855 start += from_len;
2856 end = result_s + self_len;
2857
2858 while ( --maxcount > 0) {
2859 offset = findstring(start, end-start,
2860 from_s, from_len,
2861 0, end-start, FORWARD);
2862 if (offset==-1)
2863 break;
2864 memcpy(start+offset, to_s, from_len);
2865 start += offset+from_len;
2866 }
2867
2868 return result;
2869}
2870
2871/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002872Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873replace_single_character(PyStringObject *self,
2874 char from_c,
2875 PyStringObject *to,
2876 Py_ssize_t maxcount)
2877{
2878 char *self_s, *to_s, *result_s;
2879 char *start, *next, *end;
2880 Py_ssize_t self_len, to_len, result_len;
2881 Py_ssize_t count, product;
2882 PyStringObject *result;
2883
2884 self_s = PyString_AS_STRING(self);
2885 self_len = PyString_GET_SIZE(self);
2886
2887 count = countchar(self_s, self_len, from_c);
2888 if (count > maxcount)
2889 count = maxcount;
2890
2891 if (count == 0) {
2892 /* no matches, return unchanged */
2893 return return_self(self);
2894 }
2895
2896 to_s = PyString_AS_STRING(to);
2897 to_len = PyString_GET_SIZE(to);
2898
2899 /* use the difference between current and new, hence the "-1" */
2900 /* result_len = self_len + count * (to_len-1) */
2901 product = count * (to_len-1);
2902 if (product / (to_len-1) != count) {
2903 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2904 return NULL;
2905 }
2906 result_len = self_len + product;
2907 if (result_len < 0) {
2908 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2909 return NULL;
2910 }
2911
2912 if ( (result = (PyStringObject *)
2913 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2914 return NULL;
2915 result_s = PyString_AS_STRING(result);
2916
2917 start = self_s;
2918 end = self_s + self_len;
2919 while (count-- > 0) {
2920 next = findchar(start, end-start, from_c);
2921 if (next == NULL)
2922 break;
2923
2924 if (next == start) {
2925 /* replace with the 'to' */
2926 memcpy(result_s, to_s, to_len);
2927 result_s += to_len;
2928 start += 1;
2929 } else {
2930 /* copy the unchanged old then the 'to' */
2931 memcpy(result_s, start, next-start);
2932 result_s += (next-start);
2933 memcpy(result_s, to_s, to_len);
2934 result_s += to_len;
2935 start = next+1;
2936 }
2937 }
2938 /* Copy the remainder of the remaining string */
2939 memcpy(result_s, start, end-start);
2940
2941 return result;
2942}
2943
2944/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002945Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002946replace_substring(PyStringObject *self,
2947 PyStringObject *from,
2948 PyStringObject *to,
2949 Py_ssize_t maxcount) {
2950 char *self_s, *from_s, *to_s, *result_s;
2951 char *start, *next, *end;
2952 Py_ssize_t self_len, from_len, to_len, result_len;
2953 Py_ssize_t count, offset, product;
2954 PyStringObject *result;
2955
2956 self_s = PyString_AS_STRING(self);
2957 self_len = PyString_GET_SIZE(self);
2958 from_s = PyString_AS_STRING(from);
2959 from_len = PyString_GET_SIZE(from);
2960
2961 count = countstring(self_s, self_len,
2962 from_s, from_len,
2963 0, self_len, FORWARD);
2964 if (count > maxcount)
2965 count = maxcount;
2966
2967 if (count == 0) {
2968 /* no matches, return unchanged */
2969 return return_self(self);
2970 }
2971
2972 to_s = PyString_AS_STRING(to);
2973 to_len = PyString_GET_SIZE(to);
2974
2975 /* Check for overflow */
2976 /* result_len = self_len + count * (to_len-from_len) */
2977 product = count * (to_len-from_len);
2978 if (product / (to_len-from_len) != count) {
2979 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2980 return NULL;
2981 }
2982 result_len = self_len + product;
2983 if (result_len < 0) {
2984 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2985 return NULL;
2986 }
2987
2988 if ( (result = (PyStringObject *)
2989 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2990 return NULL;
2991 result_s = PyString_AS_STRING(result);
2992
2993 start = self_s;
2994 end = self_s + self_len;
2995 while (count-- > 0) {
2996 offset = findstring(start, end-start,
2997 from_s, from_len,
2998 0, end-start, FORWARD);
2999 if (offset == -1)
3000 break;
3001 next = start+offset;
3002 if (next == start) {
3003 /* replace with the 'to' */
3004 memcpy(result_s, to_s, to_len);
3005 result_s += to_len;
3006 start += from_len;
3007 } else {
3008 /* copy the unchanged old then the 'to' */
3009 memcpy(result_s, start, next-start);
3010 result_s += (next-start);
3011 memcpy(result_s, to_s, to_len);
3012 result_s += to_len;
3013 start = next+from_len;
3014 }
3015 }
3016 /* Copy the remainder of the remaining string */
3017 memcpy(result_s, start, end-start);
3018
3019 return result;
3020}
3021
3022
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003023Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024replace(PyStringObject *self,
3025 PyStringObject *from,
3026 PyStringObject *to,
3027 Py_ssize_t maxcount)
3028{
3029 Py_ssize_t from_len, to_len;
3030
3031 if (maxcount < 0) {
3032 maxcount = PY_SSIZE_T_MAX;
3033 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3034 /* nothing to do; return the original string */
3035 return return_self(self);
3036 }
3037
3038 from_len = PyString_GET_SIZE(from);
3039 to_len = PyString_GET_SIZE(to);
3040
3041 if (maxcount == 0 ||
3042 (from_len == 0 && to_len == 0)) {
3043 /* nothing to do; return the original string */
3044 return return_self(self);
3045 }
3046
3047 /* Handle zero-length special cases */
3048
3049 if (from_len == 0) {
3050 /* insert the 'to' string everywhere. */
3051 /* >>> "Python".replace("", ".") */
3052 /* '.P.y.t.h.o.n.' */
3053 return replace_interleave(self, to, maxcount);
3054 }
3055
3056 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3057 /* point for an empty self string to generate a non-empty string */
3058 /* Special case so the remaining code always gets a non-empty string */
3059 if (PyString_GET_SIZE(self) == 0) {
3060 return return_self(self);
3061 }
3062
3063 if (to_len == 0) {
3064 /* delete all occurances of 'from' string */
3065 if (from_len == 1) {
3066 return replace_delete_single_character(
3067 self, PyString_AS_STRING(from)[0], maxcount);
3068 } else {
3069 return replace_delete_substring(self, from, maxcount);
3070 }
3071 }
3072
3073 /* Handle special case where both strings have the same length */
3074
3075 if (from_len == to_len) {
3076 if (from_len == 1) {
3077 return replace_single_character_in_place(
3078 self,
3079 PyString_AS_STRING(from)[0],
3080 PyString_AS_STRING(to)[0],
3081 maxcount);
3082 } else {
3083 return replace_substring_in_place(
3084 self, from, to, maxcount);
3085 }
3086 }
3087
3088 /* Otherwise use the more generic algorithms */
3089 if (from_len == 1) {
3090 return replace_single_character(self, PyString_AS_STRING(from)[0],
3091 to, maxcount);
3092 } else {
3093 /* len('from')>=2, len('to')>=1 */
3094 return replace_substring(self, from, to, maxcount);
3095 }
3096}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003097
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003098PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003099"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003100\n\
3101Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003102old replaced by new. If the optional argument count is\n\
3103given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003104
3105static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003106string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003107{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003108 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003110 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003111 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003112
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003113 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003114 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003116 if (PyString_Check(from)) {
3117 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003119#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003120 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003121 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003122 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003123#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003124 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 return NULL;
3126
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003127 if (PyString_Check(to)) {
3128 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003130#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003131 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003132 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003133 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003134#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003135 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136 return NULL;
3137
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003138 return (PyObject *)replace((PyStringObject *) self,
3139 (PyStringObject *) from,
3140 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003141}
3142
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003143/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003145PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003146"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003148Return True if S starts with the specified prefix, False otherwise.\n\
3149With optional start, test S beginning at that position.\n\
3150With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003151
3152static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003153string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003154{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003156 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003158 Py_ssize_t plen;
3159 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003160 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003162
Guido van Rossumc6821402000-05-08 14:08:05 +00003163 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3164 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165 return NULL;
3166 if (PyString_Check(subobj)) {
3167 prefix = PyString_AS_STRING(subobj);
3168 plen = PyString_GET_SIZE(subobj);
3169 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003170#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003171 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003172 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003173 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003174 subobj, start, end, -1);
3175 if (rc == -1)
3176 return NULL;
3177 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003178 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003179 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003180#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003182 return NULL;
3183
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003184 string_adjust_indices(&start, &end, len);
3185
3186 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003187 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003189 if (end-start >= plen)
3190 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3191 else
3192 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193}
3194
3195
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003196PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003197"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003198\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003199Return True if S ends with the specified suffix, False otherwise.\n\
3200With optional start, test S beginning at that position.\n\
3201With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202
3203static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003204string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003205{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003207 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003209 Py_ssize_t slen;
3210 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003211 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003212 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213
Guido van Rossumc6821402000-05-08 14:08:05 +00003214 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3215 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 return NULL;
3217 if (PyString_Check(subobj)) {
3218 suffix = PyString_AS_STRING(subobj);
3219 slen = PyString_GET_SIZE(subobj);
3220 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003221#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003222 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003223 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003224 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003225 subobj, start, end, +1);
3226 if (rc == -1)
3227 return NULL;
3228 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003229 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003230 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003231#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003233 return NULL;
3234
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003235 string_adjust_indices(&start, &end, len);
3236
3237 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003238 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003239
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003240 if (end-slen > start)
3241 start = end - slen;
3242 if (end-start >= slen)
3243 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3244 else
3245 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003246}
3247
3248
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003249PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003250"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003251\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003252Encodes S using the codec registered for encoding. encoding defaults\n\
3253to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003254handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003255a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3256'xmlcharrefreplace' as well as any other name registered with\n\
3257codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003258
3259static PyObject *
3260string_encode(PyStringObject *self, PyObject *args)
3261{
3262 char *encoding = NULL;
3263 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003264 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003265
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003266 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3267 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003268 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003269 if (v == NULL)
3270 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3272 PyErr_Format(PyExc_TypeError,
3273 "encoder did not return a string/unicode object "
3274 "(type=%.400s)",
3275 v->ob_type->tp_name);
3276 Py_DECREF(v);
3277 return NULL;
3278 }
3279 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003280
3281 onError:
3282 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003283}
3284
3285
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003286PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003287"S.decode([encoding[,errors]]) -> object\n\
3288\n\
3289Decodes S using the codec registered for encoding. encoding defaults\n\
3290to the default encoding. errors may be given to set a different error\n\
3291handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003292a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3293as well as any other name registerd with codecs.register_error that is\n\
3294able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003295
3296static PyObject *
3297string_decode(PyStringObject *self, PyObject *args)
3298{
3299 char *encoding = NULL;
3300 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003301 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003302
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003303 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3304 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003305 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003306 if (v == NULL)
3307 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003308 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3309 PyErr_Format(PyExc_TypeError,
3310 "decoder did not return a string/unicode object "
3311 "(type=%.400s)",
3312 v->ob_type->tp_name);
3313 Py_DECREF(v);
3314 return NULL;
3315 }
3316 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003317
3318 onError:
3319 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003320}
3321
3322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003323PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324"S.expandtabs([tabsize]) -> string\n\
3325\n\
3326Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003327If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003328
3329static PyObject*
3330string_expandtabs(PyStringObject *self, PyObject *args)
3331{
3332 const char *e, *p;
3333 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003334 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003335 PyObject *u;
3336 int tabsize = 8;
3337
3338 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3339 return NULL;
3340
Thomas Wouters7e474022000-07-16 12:04:32 +00003341 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342 i = j = 0;
3343 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3344 for (p = PyString_AS_STRING(self); p < e; p++)
3345 if (*p == '\t') {
3346 if (tabsize > 0)
3347 j += tabsize - (j % tabsize);
3348 }
3349 else {
3350 j++;
3351 if (*p == '\n' || *p == '\r') {
3352 i += j;
3353 j = 0;
3354 }
3355 }
3356
3357 /* Second pass: create output string and fill it */
3358 u = PyString_FromStringAndSize(NULL, i + j);
3359 if (!u)
3360 return NULL;
3361
3362 j = 0;
3363 q = PyString_AS_STRING(u);
3364
3365 for (p = PyString_AS_STRING(self); p < e; p++)
3366 if (*p == '\t') {
3367 if (tabsize > 0) {
3368 i = tabsize - (j % tabsize);
3369 j += i;
3370 while (i--)
3371 *q++ = ' ';
3372 }
3373 }
3374 else {
3375 j++;
3376 *q++ = *p;
3377 if (*p == '\n' || *p == '\r')
3378 j = 0;
3379 }
3380
3381 return u;
3382}
3383
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003384Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003385pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386{
3387 PyObject *u;
3388
3389 if (left < 0)
3390 left = 0;
3391 if (right < 0)
3392 right = 0;
3393
Tim Peters8fa5dd02001-09-12 02:18:30 +00003394 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 Py_INCREF(self);
3396 return (PyObject *)self;
3397 }
3398
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003399 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003400 left + PyString_GET_SIZE(self) + right);
3401 if (u) {
3402 if (left)
3403 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003404 memcpy(PyString_AS_STRING(u) + left,
3405 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 PyString_GET_SIZE(self));
3407 if (right)
3408 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3409 fill, right);
3410 }
3411
3412 return u;
3413}
3414
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003415PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003416"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003417"\n"
3418"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003419"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420
3421static PyObject *
3422string_ljust(PyStringObject *self, PyObject *args)
3423{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003424 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003425 char fillchar = ' ';
3426
Thomas Wouters4abb3662006-04-19 14:50:15 +00003427 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003428 return NULL;
3429
Tim Peters8fa5dd02001-09-12 02:18:30 +00003430 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 Py_INCREF(self);
3432 return (PyObject*) self;
3433 }
3434
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003435 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436}
3437
3438
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003439PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003440"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003441"\n"
3442"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003443"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444
3445static PyObject *
3446string_rjust(PyStringObject *self, PyObject *args)
3447{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003448 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003449 char fillchar = ' ';
3450
Thomas Wouters4abb3662006-04-19 14:50:15 +00003451 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003452 return NULL;
3453
Tim Peters8fa5dd02001-09-12 02:18:30 +00003454 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455 Py_INCREF(self);
3456 return (PyObject*) self;
3457 }
3458
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003459 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003460}
3461
3462
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003463PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003464"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003465"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003466"Return S centered in a string of length width. Padding is\n"
3467"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468
3469static PyObject *
3470string_center(PyStringObject *self, PyObject *args)
3471{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003472 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003473 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475
Thomas Wouters4abb3662006-04-19 14:50:15 +00003476 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477 return NULL;
3478
Tim Peters8fa5dd02001-09-12 02:18:30 +00003479 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480 Py_INCREF(self);
3481 return (PyObject*) self;
3482 }
3483
3484 marg = width - PyString_GET_SIZE(self);
3485 left = marg / 2 + (marg & width & 1);
3486
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003487 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488}
3489
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003490PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003491"S.zfill(width) -> string\n"
3492"\n"
3493"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003494"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003495
3496static PyObject *
3497string_zfill(PyStringObject *self, PyObject *args)
3498{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003499 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003500 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003501 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003502 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003503
Thomas Wouters4abb3662006-04-19 14:50:15 +00003504 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003505 return NULL;
3506
3507 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003508 if (PyString_CheckExact(self)) {
3509 Py_INCREF(self);
3510 return (PyObject*) self;
3511 }
3512 else
3513 return PyString_FromStringAndSize(
3514 PyString_AS_STRING(self),
3515 PyString_GET_SIZE(self)
3516 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003517 }
3518
3519 fill = width - PyString_GET_SIZE(self);
3520
3521 s = pad(self, fill, 0, '0');
3522
3523 if (s == NULL)
3524 return NULL;
3525
3526 p = PyString_AS_STRING(s);
3527 if (p[fill] == '+' || p[fill] == '-') {
3528 /* move sign to beginning of string */
3529 p[0] = p[fill];
3530 p[fill] = '0';
3531 }
3532
3533 return (PyObject*) s;
3534}
3535
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003536PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003537"S.isspace() -> bool\n\
3538\n\
3539Return True if all characters in S are whitespace\n\
3540and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541
3542static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003543string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544{
Fred Drakeba096332000-07-09 07:04:36 +00003545 register const unsigned char *p
3546 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003547 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549 /* Shortcut for single character strings */
3550 if (PyString_GET_SIZE(self) == 1 &&
3551 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003552 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003554 /* Special case for empty strings */
3555 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003556 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003557
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558 e = p + PyString_GET_SIZE(self);
3559 for (; p < e; p++) {
3560 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003561 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003563 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003564}
3565
3566
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003567PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003568"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003569\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003570Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003571and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003572
3573static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003574string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003575{
Fred Drakeba096332000-07-09 07:04:36 +00003576 register const unsigned char *p
3577 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578 register const unsigned char *e;
3579
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003580 /* Shortcut for single character strings */
3581 if (PyString_GET_SIZE(self) == 1 &&
3582 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003583 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584
3585 /* Special case for empty strings */
3586 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003587 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003588
3589 e = p + PyString_GET_SIZE(self);
3590 for (; p < e; p++) {
3591 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003592 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003594 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003595}
3596
3597
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003598PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003601Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003602and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003603
3604static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003605string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003606{
Fred Drakeba096332000-07-09 07:04:36 +00003607 register const unsigned char *p
3608 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609 register const unsigned char *e;
3610
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003611 /* Shortcut for single character strings */
3612 if (PyString_GET_SIZE(self) == 1 &&
3613 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003615
3616 /* Special case for empty strings */
3617 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003619
3620 e = p + PyString_GET_SIZE(self);
3621 for (; p < e; p++) {
3622 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003624 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003626}
3627
3628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003629PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003631\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003632Return True if all characters in S are digits\n\
3633and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634
3635static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003636string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637{
Fred Drakeba096332000-07-09 07:04:36 +00003638 register const unsigned char *p
3639 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003640 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 /* Shortcut for single character strings */
3643 if (PyString_GET_SIZE(self) == 1 &&
3644 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003647 /* Special case for empty strings */
3648 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003650
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651 e = p + PyString_GET_SIZE(self);
3652 for (; p < e; p++) {
3653 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657}
3658
3659
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003660PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003664at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665
3666static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003667string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668{
Fred Drakeba096332000-07-09 07:04:36 +00003669 register const unsigned char *p
3670 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003671 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 int cased;
3673
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674 /* Shortcut for single character strings */
3675 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003678 /* Special case for empty strings */
3679 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003681
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682 e = p + PyString_GET_SIZE(self);
3683 cased = 0;
3684 for (; p < e; p++) {
3685 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687 else if (!cased && islower(*p))
3688 cased = 1;
3689 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691}
3692
3693
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003694PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003695"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003697Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003698at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699
3700static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003701string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702{
Fred Drakeba096332000-07-09 07:04:36 +00003703 register const unsigned char *p
3704 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003705 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706 int cased;
3707
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708 /* Shortcut for single character strings */
3709 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003712 /* Special case for empty strings */
3713 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003715
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716 e = p + PyString_GET_SIZE(self);
3717 cased = 0;
3718 for (; p < e; p++) {
3719 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721 else if (!cased && isupper(*p))
3722 cased = 1;
3723 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725}
3726
3727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003728PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003731Return True if S is a titlecased string and there is at least one\n\
3732character in S, i.e. uppercase characters may only follow uncased\n\
3733characters and lowercase characters only cased ones. Return False\n\
3734otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735
3736static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003737string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738{
Fred Drakeba096332000-07-09 07:04:36 +00003739 register const unsigned char *p
3740 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003741 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742 int cased, previous_is_cased;
3743
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744 /* Shortcut for single character strings */
3745 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003746 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003748 /* Special case for empty strings */
3749 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003751
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752 e = p + PyString_GET_SIZE(self);
3753 cased = 0;
3754 previous_is_cased = 0;
3755 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003756 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757
3758 if (isupper(ch)) {
3759 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 previous_is_cased = 1;
3762 cased = 1;
3763 }
3764 else if (islower(ch)) {
3765 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003766 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 previous_is_cased = 1;
3768 cased = 1;
3769 }
3770 else
3771 previous_is_cased = 0;
3772 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003773 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774}
3775
3776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003777PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003778"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779\n\
3780Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003781Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003782is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784static PyObject*
3785string_splitlines(PyStringObject *self, PyObject *args)
3786{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003787 register Py_ssize_t i;
3788 register Py_ssize_t j;
3789 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003790 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003791 PyObject *list;
3792 PyObject *str;
3793 char *data;
3794
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003795 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796 return NULL;
3797
3798 data = PyString_AS_STRING(self);
3799 len = PyString_GET_SIZE(self);
3800
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801 list = PyList_New(0);
3802 if (!list)
3803 goto onError;
3804
3805 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003806 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003807
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 /* Find a line and append it */
3809 while (i < len && data[i] != '\n' && data[i] != '\r')
3810 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811
3812 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003813 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814 if (i < len) {
3815 if (data[i] == '\r' && i + 1 < len &&
3816 data[i+1] == '\n')
3817 i += 2;
3818 else
3819 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003820 if (keepends)
3821 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003823 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003824 j = i;
3825 }
3826 if (j < len) {
3827 SPLIT_APPEND(data, j, len);
3828 }
3829
3830 return list;
3831
3832 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003833 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834 return NULL;
3835}
3836
3837#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003838#undef SPLIT_ADD
3839#undef MAX_PREALLOC
3840#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003842static PyObject *
3843string_getnewargs(PyStringObject *v)
3844{
3845 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3846}
3847
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003848
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003849static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003850string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003851 /* Counterparts of the obsolete stropmodule functions; except
3852 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003853 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3854 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003855 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003856 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3857 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003858 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3859 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3860 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3861 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3862 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3863 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3864 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003865 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3866 capitalize__doc__},
3867 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3868 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3869 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003870 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003871 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3872 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3873 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3874 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3875 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3876 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3877 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3878 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3879 startswith__doc__},
3880 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3881 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3882 swapcase__doc__},
3883 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3884 translate__doc__},
3885 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3886 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3887 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3888 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3889 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3890 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3891 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3892 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3893 expandtabs__doc__},
3894 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3895 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003896 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003897 {NULL, NULL} /* sentinel */
3898};
3899
Jeremy Hylton938ace62002-07-17 16:30:39 +00003900static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003901str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3902
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003903static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003904string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003905{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003906 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003907 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003908
Guido van Rossumae960af2001-08-30 03:11:59 +00003909 if (type != &PyString_Type)
3910 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003911 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3912 return NULL;
3913 if (x == NULL)
3914 return PyString_FromString("");
3915 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003916}
3917
Guido van Rossumae960af2001-08-30 03:11:59 +00003918static PyObject *
3919str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3920{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003921 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003922 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003923
3924 assert(PyType_IsSubtype(type, &PyString_Type));
3925 tmp = string_new(&PyString_Type, args, kwds);
3926 if (tmp == NULL)
3927 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003928 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003929 n = PyString_GET_SIZE(tmp);
3930 pnew = type->tp_alloc(type, n);
3931 if (pnew != NULL) {
3932 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003933 ((PyStringObject *)pnew)->ob_shash =
3934 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003935 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003936 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003937 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003938 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003939}
3940
Guido van Rossumcacfc072002-05-24 19:01:59 +00003941static PyObject *
3942basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3943{
3944 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003945 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003946 return NULL;
3947}
3948
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003949static PyObject *
3950string_mod(PyObject *v, PyObject *w)
3951{
3952 if (!PyString_Check(v)) {
3953 Py_INCREF(Py_NotImplemented);
3954 return Py_NotImplemented;
3955 }
3956 return PyString_Format(v, w);
3957}
3958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003959PyDoc_STRVAR(basestring_doc,
3960"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003961
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003962static PyNumberMethods string_as_number = {
3963 0, /*nb_add*/
3964 0, /*nb_subtract*/
3965 0, /*nb_multiply*/
3966 0, /*nb_divide*/
3967 string_mod, /*nb_remainder*/
3968};
3969
3970
Guido van Rossumcacfc072002-05-24 19:01:59 +00003971PyTypeObject PyBaseString_Type = {
3972 PyObject_HEAD_INIT(&PyType_Type)
3973 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003974 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003975 0,
3976 0,
3977 0, /* tp_dealloc */
3978 0, /* tp_print */
3979 0, /* tp_getattr */
3980 0, /* tp_setattr */
3981 0, /* tp_compare */
3982 0, /* tp_repr */
3983 0, /* tp_as_number */
3984 0, /* tp_as_sequence */
3985 0, /* tp_as_mapping */
3986 0, /* tp_hash */
3987 0, /* tp_call */
3988 0, /* tp_str */
3989 0, /* tp_getattro */
3990 0, /* tp_setattro */
3991 0, /* tp_as_buffer */
3992 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3993 basestring_doc, /* tp_doc */
3994 0, /* tp_traverse */
3995 0, /* tp_clear */
3996 0, /* tp_richcompare */
3997 0, /* tp_weaklistoffset */
3998 0, /* tp_iter */
3999 0, /* tp_iternext */
4000 0, /* tp_methods */
4001 0, /* tp_members */
4002 0, /* tp_getset */
4003 &PyBaseObject_Type, /* tp_base */
4004 0, /* tp_dict */
4005 0, /* tp_descr_get */
4006 0, /* tp_descr_set */
4007 0, /* tp_dictoffset */
4008 0, /* tp_init */
4009 0, /* tp_alloc */
4010 basestring_new, /* tp_new */
4011 0, /* tp_free */
4012};
4013
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004014PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015"str(object) -> string\n\
4016\n\
4017Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004018If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004019
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020PyTypeObject PyString_Type = {
4021 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004022 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004024 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004025 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004026 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004027 (printfunc)string_print, /* tp_print */
4028 0, /* tp_getattr */
4029 0, /* tp_setattr */
4030 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004031 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004032 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004033 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004034 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004035 (hashfunc)string_hash, /* tp_hash */
4036 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004037 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004038 PyObject_GenericGetAttr, /* tp_getattro */
4039 0, /* tp_setattro */
4040 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004041 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004042 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004043 string_doc, /* tp_doc */
4044 0, /* tp_traverse */
4045 0, /* tp_clear */
4046 (richcmpfunc)string_richcompare, /* tp_richcompare */
4047 0, /* tp_weaklistoffset */
4048 0, /* tp_iter */
4049 0, /* tp_iternext */
4050 string_methods, /* tp_methods */
4051 0, /* tp_members */
4052 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004053 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004054 0, /* tp_dict */
4055 0, /* tp_descr_get */
4056 0, /* tp_descr_set */
4057 0, /* tp_dictoffset */
4058 0, /* tp_init */
4059 0, /* tp_alloc */
4060 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004061 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004062};
4063
4064void
Fred Drakeba096332000-07-09 07:04:36 +00004065PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004067 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004068 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004069 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070 if (w == NULL || !PyString_Check(*pv)) {
4071 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004072 *pv = NULL;
4073 return;
4074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004075 v = string_concat((PyStringObject *) *pv, w);
4076 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004077 *pv = v;
4078}
4079
Guido van Rossum013142a1994-08-30 08:19:36 +00004080void
Fred Drakeba096332000-07-09 07:04:36 +00004081PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004082{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 PyString_Concat(pv, w);
4084 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004085}
4086
4087
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088/* The following function breaks the notion that strings are immutable:
4089 it changes the size of a string. We get away with this only if there
4090 is only one module referencing the object. You can also think of it
4091 as creating a new string object and destroying the old one, only
4092 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004093 already be known to some other part of the code...
4094 Note that if there's not enough memory to resize the string, the original
4095 string object at *pv is deallocated, *pv is set to NULL, an "out of
4096 memory" exception is set, and -1 is returned. Else (on success) 0 is
4097 returned, and the value in *pv may or may not be the same as on input.
4098 As always, an extra byte is allocated for a trailing \0 byte (newsize
4099 does *not* include that), and a trailing \0 byte is stored.
4100*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004101
4102int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004103_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004104{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 register PyObject *v;
4106 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004107 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004108 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4109 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004111 Py_DECREF(v);
4112 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004113 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004114 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004115 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004116 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004117 _Py_ForgetReference(v);
4118 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004119 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004120 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004121 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004122 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004123 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004125 _Py_NewReference(*pv);
4126 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004127 sv->ob_size = newsize;
4128 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004129 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004130 return 0;
4131}
Guido van Rossume5372401993-03-16 12:15:04 +00004132
4133/* Helpers for formatstring */
4134
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004135Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004136getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004137{
Thomas Wouters977485d2006-02-16 15:59:12 +00004138 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004139 if (argidx < arglen) {
4140 (*p_argidx)++;
4141 if (arglen < 0)
4142 return args;
4143 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004144 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004145 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004146 PyErr_SetString(PyExc_TypeError,
4147 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004148 return NULL;
4149}
4150
Tim Peters38fd5b62000-09-21 05:43:11 +00004151/* Format codes
4152 * F_LJUST '-'
4153 * F_SIGN '+'
4154 * F_BLANK ' '
4155 * F_ALT '#'
4156 * F_ZERO '0'
4157 */
Guido van Rossume5372401993-03-16 12:15:04 +00004158#define F_LJUST (1<<0)
4159#define F_SIGN (1<<1)
4160#define F_BLANK (1<<2)
4161#define F_ALT (1<<3)
4162#define F_ZERO (1<<4)
4163
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004164Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004165formatfloat(char *buf, size_t buflen, int flags,
4166 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004167{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004168 /* fmt = '%#.' + `prec` + `type`
4169 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004170 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004171 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004172 x = PyFloat_AsDouble(v);
4173 if (x == -1.0 && PyErr_Occurred()) {
4174 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004175 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004176 }
Guido van Rossume5372401993-03-16 12:15:04 +00004177 if (prec < 0)
4178 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004179 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4180 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004181 /* Worst case length calc to ensure no buffer overrun:
4182
4183 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004184 fmt = %#.<prec>g
4185 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004186 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004187 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004188
4189 'f' formats:
4190 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4191 len = 1 + 50 + 1 + prec = 52 + prec
4192
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004193 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004194 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004195
4196 */
4197 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4198 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004199 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004200 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004201 return -1;
4202 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004203 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4204 (flags&F_ALT) ? "#" : "",
4205 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004206 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004207 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004208}
4209
Tim Peters38fd5b62000-09-21 05:43:11 +00004210/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4211 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4212 * Python's regular ints.
4213 * Return value: a new PyString*, or NULL if error.
4214 * . *pbuf is set to point into it,
4215 * *plen set to the # of chars following that.
4216 * Caller must decref it when done using pbuf.
4217 * The string starting at *pbuf is of the form
4218 * "-"? ("0x" | "0X")? digit+
4219 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004220 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 * There will be at least prec digits, zero-filled on the left if
4222 * necessary to get that many.
4223 * val object to be converted
4224 * flags bitmask of format flags; only F_ALT is looked at
4225 * prec minimum number of digits; 0-fill on left if needed
4226 * type a character in [duoxX]; u acts the same as d
4227 *
4228 * CAUTION: o, x and X conversions on regular ints can never
4229 * produce a '-' sign, but can for Python's unbounded ints.
4230 */
4231PyObject*
4232_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4233 char **pbuf, int *plen)
4234{
4235 PyObject *result = NULL;
4236 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004237 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004238 int sign; /* 1 if '-', else 0 */
4239 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004240 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 int numdigits; /* len == numnondigits + numdigits */
4242 int numnondigits = 0;
4243
4244 switch (type) {
4245 case 'd':
4246 case 'u':
4247 result = val->ob_type->tp_str(val);
4248 break;
4249 case 'o':
4250 result = val->ob_type->tp_as_number->nb_oct(val);
4251 break;
4252 case 'x':
4253 case 'X':
4254 numnondigits = 2;
4255 result = val->ob_type->tp_as_number->nb_hex(val);
4256 break;
4257 default:
4258 assert(!"'type' not in [duoxX]");
4259 }
4260 if (!result)
4261 return NULL;
4262
4263 /* To modify the string in-place, there can only be one reference. */
4264 if (result->ob_refcnt != 1) {
4265 PyErr_BadInternalCall();
4266 return NULL;
4267 }
4268 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004269 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004270 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004271 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4272 return NULL;
4273 }
4274 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004275 if (buf[len-1] == 'L') {
4276 --len;
4277 buf[len] = '\0';
4278 }
4279 sign = buf[0] == '-';
4280 numnondigits += sign;
4281 numdigits = len - numnondigits;
4282 assert(numdigits > 0);
4283
Tim Petersfff53252001-04-12 18:38:48 +00004284 /* Get rid of base marker unless F_ALT */
4285 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004286 /* Need to skip 0x, 0X or 0. */
4287 int skipped = 0;
4288 switch (type) {
4289 case 'o':
4290 assert(buf[sign] == '0');
4291 /* If 0 is only digit, leave it alone. */
4292 if (numdigits > 1) {
4293 skipped = 1;
4294 --numdigits;
4295 }
4296 break;
4297 case 'x':
4298 case 'X':
4299 assert(buf[sign] == '0');
4300 assert(buf[sign + 1] == 'x');
4301 skipped = 2;
4302 numnondigits -= 2;
4303 break;
4304 }
4305 if (skipped) {
4306 buf += skipped;
4307 len -= skipped;
4308 if (sign)
4309 buf[0] = '-';
4310 }
4311 assert(len == numnondigits + numdigits);
4312 assert(numdigits > 0);
4313 }
4314
4315 /* Fill with leading zeroes to meet minimum width. */
4316 if (prec > numdigits) {
4317 PyObject *r1 = PyString_FromStringAndSize(NULL,
4318 numnondigits + prec);
4319 char *b1;
4320 if (!r1) {
4321 Py_DECREF(result);
4322 return NULL;
4323 }
4324 b1 = PyString_AS_STRING(r1);
4325 for (i = 0; i < numnondigits; ++i)
4326 *b1++ = *buf++;
4327 for (i = 0; i < prec - numdigits; i++)
4328 *b1++ = '0';
4329 for (i = 0; i < numdigits; i++)
4330 *b1++ = *buf++;
4331 *b1 = '\0';
4332 Py_DECREF(result);
4333 result = r1;
4334 buf = PyString_AS_STRING(result);
4335 len = numnondigits + prec;
4336 }
4337
4338 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004339 if (type == 'X') {
4340 /* Need to convert all lower case letters to upper case.
4341 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004342 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004343 if (buf[i] >= 'a' && buf[i] <= 'x')
4344 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004345 }
4346 *pbuf = buf;
4347 *plen = len;
4348 return result;
4349}
4350
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004351Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004352formatint(char *buf, size_t buflen, int flags,
4353 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004354{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004355 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004356 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4357 + 1 + 1 = 24 */
4358 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004359 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004360 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004361
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004362 x = PyInt_AsLong(v);
4363 if (x == -1 && PyErr_Occurred()) {
4364 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004365 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004366 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004367 if (x < 0 && type == 'u') {
4368 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004369 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004370 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4371 sign = "-";
4372 else
4373 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004374 if (prec < 0)
4375 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376
4377 if ((flags & F_ALT) &&
4378 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004379 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004380 * of issues that cause pain:
4381 * - when 0 is being converted, the C standard leaves off
4382 * the '0x' or '0X', which is inconsistent with other
4383 * %#x/%#X conversions and inconsistent with Python's
4384 * hex() function
4385 * - there are platforms that violate the standard and
4386 * convert 0 with the '0x' or '0X'
4387 * (Metrowerks, Compaq Tru64)
4388 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004389 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004391 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004392 * We can achieve the desired consistency by inserting our
4393 * own '0x' or '0X' prefix, and substituting %x/%X in place
4394 * of %#x/%#X.
4395 *
4396 * Note that this is the same approach as used in
4397 * formatint() in unicodeobject.c
4398 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4400 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 }
4402 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004403 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4404 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004405 prec, type);
4406 }
4407
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004408 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4409 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004410 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004411 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004412 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004413 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004414 return -1;
4415 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004416 if (sign[0])
4417 PyOS_snprintf(buf, buflen, fmt, -x);
4418 else
4419 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004420 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004421}
4422
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004423Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004424formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004425{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004426 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004427 if (PyString_Check(v)) {
4428 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004429 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004430 }
4431 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004432 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004433 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004434 }
4435 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004436 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004437}
4438
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004439/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4440
4441 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4442 chars are formatted. XXX This is a magic number. Each formatting
4443 routine does bounds checking to ensure no overflow, but a better
4444 solution may be to malloc a buffer of appropriate size for each
4445 format. For now, the current solution is sufficient.
4446*/
4447#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004448
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004449PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004450PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004451{
4452 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004453 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004454 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004455 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004456 PyObject *result, *orig_args;
4457#ifdef Py_USING_UNICODE
4458 PyObject *v, *w;
4459#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 PyObject *dict = NULL;
4461 if (format == NULL || !PyString_Check(format) || args == NULL) {
4462 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004463 return NULL;
4464 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004465 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004466 fmt = PyString_AS_STRING(format);
4467 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004468 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004469 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004470 if (result == NULL)
4471 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004472 res = PyString_AsString(result);
4473 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004474 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004475 argidx = 0;
4476 }
4477 else {
4478 arglen = -1;
4479 argidx = -2;
4480 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004481 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4482 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004483 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004484 while (--fmtcnt >= 0) {
4485 if (*fmt != '%') {
4486 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004487 rescnt = fmtcnt + 100;
4488 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004490 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004491 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004492 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004493 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004494 }
4495 *res++ = *fmt++;
4496 }
4497 else {
4498 /* Got a format specifier */
4499 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004500 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004501 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004502 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004503 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004504 PyObject *v = NULL;
4505 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004506 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004507 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004508 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004509 char formatbuf[FORMATBUFLEN];
4510 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004511#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004512 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004513 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004514#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004515
Guido van Rossumda9c2711996-12-05 21:58:58 +00004516 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004517 if (*fmt == '(') {
4518 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004519 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004521 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004522
4523 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004524 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004525 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004526 goto error;
4527 }
4528 ++fmt;
4529 --fmtcnt;
4530 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004531 /* Skip over balanced parentheses */
4532 while (pcount > 0 && --fmtcnt >= 0) {
4533 if (*fmt == ')')
4534 --pcount;
4535 else if (*fmt == '(')
4536 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004537 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004538 }
4539 keylen = fmt - keystart - 1;
4540 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004542 "incomplete format key");
4543 goto error;
4544 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 key = PyString_FromStringAndSize(keystart,
4546 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004547 if (key == NULL)
4548 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004549 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004551 args_owned = 0;
4552 }
4553 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004555 if (args == NULL) {
4556 goto error;
4557 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004558 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004559 arglen = -1;
4560 argidx = -2;
4561 }
Guido van Rossume5372401993-03-16 12:15:04 +00004562 while (--fmtcnt >= 0) {
4563 switch (c = *fmt++) {
4564 case '-': flags |= F_LJUST; continue;
4565 case '+': flags |= F_SIGN; continue;
4566 case ' ': flags |= F_BLANK; continue;
4567 case '#': flags |= F_ALT; continue;
4568 case '0': flags |= F_ZERO; continue;
4569 }
4570 break;
4571 }
4572 if (c == '*') {
4573 v = getnextarg(args, arglen, &argidx);
4574 if (v == NULL)
4575 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 if (!PyInt_Check(v)) {
4577 PyErr_SetString(PyExc_TypeError,
4578 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004579 goto error;
4580 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004581 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004582 if (width < 0) {
4583 flags |= F_LJUST;
4584 width = -width;
4585 }
Guido van Rossume5372401993-03-16 12:15:04 +00004586 if (--fmtcnt >= 0)
4587 c = *fmt++;
4588 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004589 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004590 width = c - '0';
4591 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004592 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004593 if (!isdigit(c))
4594 break;
4595 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 PyErr_SetString(
4597 PyExc_ValueError,
4598 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004599 goto error;
4600 }
4601 width = width*10 + (c - '0');
4602 }
4603 }
4604 if (c == '.') {
4605 prec = 0;
4606 if (--fmtcnt >= 0)
4607 c = *fmt++;
4608 if (c == '*') {
4609 v = getnextarg(args, arglen, &argidx);
4610 if (v == NULL)
4611 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004612 if (!PyInt_Check(v)) {
4613 PyErr_SetString(
4614 PyExc_TypeError,
4615 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004616 goto error;
4617 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004618 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004619 if (prec < 0)
4620 prec = 0;
4621 if (--fmtcnt >= 0)
4622 c = *fmt++;
4623 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004624 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004625 prec = c - '0';
4626 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004627 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004628 if (!isdigit(c))
4629 break;
4630 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004631 PyErr_SetString(
4632 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004633 "prec too big");
4634 goto error;
4635 }
4636 prec = prec*10 + (c - '0');
4637 }
4638 }
4639 } /* prec */
4640 if (fmtcnt >= 0) {
4641 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004642 if (--fmtcnt >= 0)
4643 c = *fmt++;
4644 }
4645 }
4646 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004647 PyErr_SetString(PyExc_ValueError,
4648 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004649 goto error;
4650 }
4651 if (c != '%') {
4652 v = getnextarg(args, arglen, &argidx);
4653 if (v == NULL)
4654 goto error;
4655 }
4656 sign = 0;
4657 fill = ' ';
4658 switch (c) {
4659 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004660 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004661 len = 1;
4662 break;
4663 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004664#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004665 if (PyUnicode_Check(v)) {
4666 fmt = fmt_start;
4667 argidx = argidx_start;
4668 goto unicode;
4669 }
Georg Brandld45014b2005-10-01 17:06:00 +00004670#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004671 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004672#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004673 if (temp != NULL && PyUnicode_Check(temp)) {
4674 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004675 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004676 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004677 goto unicode;
4678 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004679#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004680 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004681 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004682 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004683 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004684 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004685 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004686 if (!PyString_Check(temp)) {
4687 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004688 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004689 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004690 goto error;
4691 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004692 pbuf = PyString_AS_STRING(temp);
4693 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004694 if (prec >= 0 && len > prec)
4695 len = prec;
4696 break;
4697 case 'i':
4698 case 'd':
4699 case 'u':
4700 case 'o':
4701 case 'x':
4702 case 'X':
4703 if (c == 'i')
4704 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004705 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004706 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004707 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004708 prec, c, &pbuf, &ilen);
4709 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 if (!temp)
4711 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004712 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004713 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004714 else {
4715 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004716 len = formatint(pbuf,
4717 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004718 flags, prec, c, v);
4719 if (len < 0)
4720 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004721 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 }
4723 if (flags & F_ZERO)
4724 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004725 break;
4726 case 'e':
4727 case 'E':
4728 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004729 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004730 case 'g':
4731 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004732 if (c == 'F')
4733 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004734 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004735 len = formatfloat(pbuf, sizeof(formatbuf),
4736 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004737 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004738 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004739 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004741 fill = '0';
4742 break;
4743 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004744#ifdef Py_USING_UNICODE
4745 if (PyUnicode_Check(v)) {
4746 fmt = fmt_start;
4747 argidx = argidx_start;
4748 goto unicode;
4749 }
4750#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004751 pbuf = formatbuf;
4752 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004753 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004754 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004755 break;
4756 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004757 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004758 "unsupported format character '%c' (0x%x) "
4759 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004760 c, c,
4761 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004762 goto error;
4763 }
4764 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004765 if (*pbuf == '-' || *pbuf == '+') {
4766 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004767 len--;
4768 }
4769 else if (flags & F_SIGN)
4770 sign = '+';
4771 else if (flags & F_BLANK)
4772 sign = ' ';
4773 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004774 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004775 }
4776 if (width < len)
4777 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004778 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004779 reslen -= rescnt;
4780 rescnt = width + fmtcnt + 100;
4781 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004782 if (reslen < 0) {
4783 Py_DECREF(result);
4784 return PyErr_NoMemory();
4785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004786 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004787 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004788 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004789 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004790 }
4791 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004792 if (fill != ' ')
4793 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004794 rescnt--;
4795 if (width > len)
4796 width--;
4797 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004798 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4799 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004800 assert(pbuf[1] == c);
4801 if (fill != ' ') {
4802 *res++ = *pbuf++;
4803 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004804 }
Tim Petersfff53252001-04-12 18:38:48 +00004805 rescnt -= 2;
4806 width -= 2;
4807 if (width < 0)
4808 width = 0;
4809 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004810 }
4811 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004812 do {
4813 --rescnt;
4814 *res++ = fill;
4815 } while (--width > len);
4816 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 if (fill == ' ') {
4818 if (sign)
4819 *res++ = sign;
4820 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004821 (c == 'x' || c == 'X')) {
4822 assert(pbuf[0] == '0');
4823 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 *res++ = *pbuf++;
4825 *res++ = *pbuf++;
4826 }
4827 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004828 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004829 res += len;
4830 rescnt -= len;
4831 while (--width >= len) {
4832 --rescnt;
4833 *res++ = ' ';
4834 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004835 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004836 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004837 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004838 goto error;
4839 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004840 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004841 } /* '%' */
4842 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004843 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004844 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004845 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004846 goto error;
4847 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004848 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004849 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004850 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004851 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004852 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004853
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004854#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004855 unicode:
4856 if (args_owned) {
4857 Py_DECREF(args);
4858 args_owned = 0;
4859 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004860 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004861 if (PyTuple_Check(orig_args) && argidx > 0) {
4862 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004863 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004864 v = PyTuple_New(n);
4865 if (v == NULL)
4866 goto error;
4867 while (--n >= 0) {
4868 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4869 Py_INCREF(w);
4870 PyTuple_SET_ITEM(v, n, w);
4871 }
4872 args = v;
4873 } else {
4874 Py_INCREF(orig_args);
4875 args = orig_args;
4876 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004877 args_owned = 1;
4878 /* Take what we have of the result and let the Unicode formatting
4879 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004880 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004881 if (_PyString_Resize(&result, rescnt))
4882 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004883 fmtcnt = PyString_GET_SIZE(format) - \
4884 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004885 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4886 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004887 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004888 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004889 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004890 if (v == NULL)
4891 goto error;
4892 /* Paste what we have (result) to what the Unicode formatting
4893 function returned (v) and return the result (or error) */
4894 w = PyUnicode_Concat(result, v);
4895 Py_DECREF(result);
4896 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004897 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004898 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004899#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004900
Guido van Rossume5372401993-03-16 12:15:04 +00004901 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004902 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004903 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004904 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004905 }
Guido van Rossume5372401993-03-16 12:15:04 +00004906 return NULL;
4907}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004908
Guido van Rossum2a61e741997-01-18 07:55:05 +00004909void
Fred Drakeba096332000-07-09 07:04:36 +00004910PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004911{
4912 register PyStringObject *s = (PyStringObject *)(*p);
4913 PyObject *t;
4914 if (s == NULL || !PyString_Check(s))
4915 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004916 /* If it's a string subclass, we don't really know what putting
4917 it in the interned dict might do. */
4918 if (!PyString_CheckExact(s))
4919 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004920 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004921 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922 if (interned == NULL) {
4923 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004924 if (interned == NULL) {
4925 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004926 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004928 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004929 t = PyDict_GetItem(interned, (PyObject *)s);
4930 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004931 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004932 Py_DECREF(*p);
4933 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934 return;
4935 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004936
Armin Rigo79f7ad22004-08-07 19:27:39 +00004937 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004938 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939 return;
4940 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004941 /* The two references in interned are not counted by refcnt.
4942 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004943 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004944 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945}
4946
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004947void
4948PyString_InternImmortal(PyObject **p)
4949{
4950 PyString_InternInPlace(p);
4951 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4952 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4953 Py_INCREF(*p);
4954 }
4955}
4956
Guido van Rossum2a61e741997-01-18 07:55:05 +00004957
4958PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004959PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004960{
4961 PyObject *s = PyString_FromString(cp);
4962 if (s == NULL)
4963 return NULL;
4964 PyString_InternInPlace(&s);
4965 return s;
4966}
4967
Guido van Rossum8cf04761997-08-02 02:57:45 +00004968void
Fred Drakeba096332000-07-09 07:04:36 +00004969PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004970{
4971 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004972 for (i = 0; i < UCHAR_MAX + 1; i++) {
4973 Py_XDECREF(characters[i]);
4974 characters[i] = NULL;
4975 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004976 Py_XDECREF(nullstring);
4977 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004978}
Barry Warsawa903ad982001-02-23 16:40:48 +00004979
Barry Warsawa903ad982001-02-23 16:40:48 +00004980void _Py_ReleaseInternedStrings(void)
4981{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004982 PyObject *keys;
4983 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004984 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004985
4986 if (interned == NULL || !PyDict_Check(interned))
4987 return;
4988 keys = PyDict_Keys(interned);
4989 if (keys == NULL || !PyList_Check(keys)) {
4990 PyErr_Clear();
4991 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004992 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004993
4994 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4995 detector, interned strings are not forcibly deallocated; rather, we
4996 give them their stolen references back, and then clear and DECREF
4997 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004998
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004999 fprintf(stderr, "releasing interned strings\n");
5000 n = PyList_GET_SIZE(keys);
5001 for (i = 0; i < n; i++) {
5002 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5003 switch (s->ob_sstate) {
5004 case SSTATE_NOT_INTERNED:
5005 /* XXX Shouldn't happen */
5006 break;
5007 case SSTATE_INTERNED_IMMORTAL:
5008 s->ob_refcnt += 1;
5009 break;
5010 case SSTATE_INTERNED_MORTAL:
5011 s->ob_refcnt += 2;
5012 break;
5013 default:
5014 Py_FatalError("Inconsistent interned string state.");
5015 }
5016 s->ob_sstate = SSTATE_NOT_INTERNED;
5017 }
5018 Py_DECREF(keys);
5019 PyDict_Clear(interned);
5020 Py_DECREF(interned);
5021 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005022}