blob: 3790bfa3c803ff2ab23fd96d48045417571a27c8 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#ifdef USE_FAST
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb9479482006-05-26 17:22:38 +0000775#define STRINGLIB_NEW PyString_FromStringAndSize
776#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777
Fredrik Lundha50d2012006-05-26 17:04:58 +0000778#include "stringlib/fastsearch.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#endif
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
783/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784/* Methods */
785
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000786static int
Fred Drakeba096332000-07-09 07:04:36 +0000787string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000789 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000792
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000794 if (! PyString_CheckExact(op)) {
795 int ret;
796 /* A str subclass may have its own __str__ method. */
797 op = (PyStringObject *) PyObject_Str((PyObject *)op);
798 if (op == NULL)
799 return -1;
800 ret = string_print(op, fp, flags);
801 Py_DECREF(op);
802 return ret;
803 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000804 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000805#ifdef __VMS
806 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
807#else
808 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
809#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000812
Thomas Wouters7e474022000-07-16 12:04:32 +0000813 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000815 if (memchr(op->ob_sval, '\'', op->ob_size) &&
816 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '"';
818
819 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000820 for (i = 0; i < op->ob_size; i++) {
821 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000823 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000824 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\r");
830 else if (c < ' ' || c >= 0x7f)
831 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000832 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000833 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000836 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837}
838
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000839PyObject *
840PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000843 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000844 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000845 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyErr_SetString(PyExc_OverflowError,
847 "string is too large to make repr");
848 }
849 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000851 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 }
853 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000854 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 register char c;
856 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000857 int quote;
858
Thomas Wouters7e474022000-07-16 12:04:32 +0000859 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000861 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000862 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000863 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 quote = '"';
865
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000869 /* There's at least enough room for a hex escape
870 and a closing quote. */
871 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000873 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000875 else if (c == '\t')
876 *p++ = '\\', *p++ = 't';
877 else if (c == '\n')
878 *p++ = '\\', *p++ = 'n';
879 else if (c == '\r')
880 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000881 else if (c < ' ' || c >= 0x7f) {
882 /* For performance, we don't want to call
883 PyOS_snprintf here (extra layers of
884 function call). */
885 sprintf(p, "\\x%02x", c & 0xff);
886 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000887 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000888 else
889 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000895 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000896 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898}
899
Guido van Rossum189f1df2001-05-01 16:51:53 +0000900static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000901string_repr(PyObject *op)
902{
903 return PyString_Repr(op, 1);
904}
905
906static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000907string_str(PyObject *s)
908{
Tim Petersc9933152001-10-16 20:18:24 +0000909 assert(PyString_Check(s));
910 if (PyString_CheckExact(s)) {
911 Py_INCREF(s);
912 return s;
913 }
914 else {
915 /* Subtype -- return genuine string with the same value. */
916 PyStringObject *t = (PyStringObject *) s;
917 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
918 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000919}
920
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000922string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923{
924 return a->ob_size;
925}
926
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000927static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000928string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929{
Andrew Dalke598710c2006-05-25 18:18:39 +0000930 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
932 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000933#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 if (PyUnicode_Check(bb))
935 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000936#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000937 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000938 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000939 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 return NULL;
941 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000942#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if ((a->ob_size == 0 || b->ob_size == 0) &&
945 PyString_CheckExact(a) && PyString_CheckExact(b)) {
946 if (a->ob_size == 0) {
947 Py_INCREF(bb);
948 return bb;
949 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000950 Py_INCREF(a);
951 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952 }
953 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000954 if (size < 0) {
955 PyErr_SetString(PyExc_OverflowError,
956 "strings are too large to concat");
957 return NULL;
958 }
959
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000960 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000961 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000962 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000964 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000965 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000966 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000967 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
968 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000969 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971#undef b
972}
973
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000975string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977 register Py_ssize_t i;
978 register Py_ssize_t j;
979 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000981 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000982 if (n < 0)
983 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000984 /* watch out for overflows: the size can overflow int,
985 * and the # of bytes needed can overflow size_t
986 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000988 if (n && size / n != a->ob_size) {
989 PyErr_SetString(PyExc_OverflowError,
990 "repeated string is too long");
991 return NULL;
992 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000993 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
Tim Peterse7c05322004-06-27 17:24:49 +0000997 nbytes = (size_t)size;
998 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000999 PyErr_SetString(PyExc_OverflowError,
1000 "repeated string is too long");
1001 return NULL;
1002 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001003 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001004 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001005 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001007 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001008 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001009 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001010 op->ob_sval[size] = '\0';
1011 if (a->ob_size == 1 && n > 0) {
1012 memset(op->ob_sval, a->ob_sval[0] , n);
1013 return (PyObject *) op;
1014 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001015 i = 0;
1016 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001017 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1018 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001019 }
1020 while (i < size) {
1021 j = (i <= size-i) ? i : size-i;
1022 memcpy(op->ob_sval+i, op->ob_sval, j);
1023 i += j;
1024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001026}
1027
1028/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1029
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001031string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001033 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001034{
1035 if (i < 0)
1036 i = 0;
1037 if (j < 0)
1038 j = 0; /* Avoid signed/unsigned bug in next line */
1039 if (j > a->ob_size)
1040 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001041 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1042 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043 Py_INCREF(a);
1044 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045 }
1046 if (j < i)
1047 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001049}
1050
Guido van Rossum9284a572000-03-07 15:53:43 +00001051static int
Fred Drakeba096332000-07-09 07:04:36 +00001052string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001053{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001054 char *s = PyString_AS_STRING(a);
1055 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001056 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001057#ifdef USE_FAST
1058 Py_ssize_t pos;
1059#else
1060 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001061 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001062 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001063#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001064
1065 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001067 if (PyUnicode_Check(el))
1068 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001070 if (!PyString_Check(el)) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "'in <string>' requires string as left operand");
1073 return -1;
1074 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001075 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001076
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001077 if (len_sub == 0)
1078 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001079
1080#ifdef USE_FAST
1081 pos = fastsearch(
1082 s, PyString_GET_SIZE(a),
1083 sub, len_sub, FAST_SEARCH
1084 );
1085 return (pos != -1);
1086#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001087 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001088 substring. When s<last, there is still room for a possible match
1089 and s[0] through s[len_sub-1] will be in bounds.
1090 shortsub is len_sub minus the last character which is checked
1091 separately just before the memcmp(). That check helps prevent
1092 false starts and saves the setup time for memcmp().
1093 */
1094 firstchar = sub[0];
1095 shortsub = len_sub - 1;
1096 lastchar = sub[shortsub];
1097 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1098 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001099 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001100 if (s == NULL)
1101 return 0;
1102 assert(s < last);
1103 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001104 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001105 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001106 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001107#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001108 return 0;
1109}
1110
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001113{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001114 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001115 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001116 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001117 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001118 return NULL;
1119 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001120 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001121 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001122 if (v == NULL)
1123 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001124 else {
1125#ifdef COUNT_ALLOCS
1126 one_strings++;
1127#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001128 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001129 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001130 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001131}
1132
Martin v. Löwiscd353062001-05-24 16:56:35 +00001133static PyObject*
1134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001135{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001136 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001139 PyObject *result;
1140
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001143 result = Py_NotImplemented;
1144 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001145 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (a->ob_size == b->ob_size
1160 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001161 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001162 a->ob_size) == 0)) {
1163 result = Py_True;
1164 } else {
1165 result = Py_False;
1166 }
1167 goto out;
1168 }
1169 len_a = a->ob_size; len_b = b->ob_size;
1170 min_len = (len_a < len_b) ? len_a : len_b;
1171 if (min_len > 0) {
1172 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1173 if (c==0)
1174 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1175 }else
1176 c = 0;
1177 if (c == 0)
1178 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1179 switch (op) {
1180 case Py_LT: c = c < 0; break;
1181 case Py_LE: c = c <= 0; break;
1182 case Py_EQ: assert(0); break; /* unreachable */
1183 case Py_NE: c = c != 0; break;
1184 case Py_GT: c = c > 0; break;
1185 case Py_GE: c = c >= 0; break;
1186 default:
1187 result = Py_NotImplemented;
1188 goto out;
1189 }
1190 result = c ? Py_True : Py_False;
1191 out:
1192 Py_INCREF(result);
1193 return result;
1194}
1195
1196int
1197_PyString_Eq(PyObject *o1, PyObject *o2)
1198{
1199 PyStringObject *a, *b;
1200 a = (PyStringObject*)o1;
1201 b = (PyStringObject*)o2;
1202 return a->ob_size == b->ob_size
1203 && *a->ob_sval == *b->ob_sval
1204 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001205}
1206
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207static long
Fred Drakeba096332000-07-09 07:04:36 +00001208string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001209{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001210 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001211 register unsigned char *p;
1212 register long x;
1213
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001214 if (a->ob_shash != -1)
1215 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001216 len = a->ob_size;
1217 p = (unsigned char *) a->ob_sval;
1218 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001219 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001220 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001221 x ^= a->ob_size;
1222 if (x == -1)
1223 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001224 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001225 return x;
1226}
1227
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001228#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1229
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230static PyObject*
1231string_subscript(PyStringObject* self, PyObject* item)
1232{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001233 PyNumberMethods *nb = item->ob_type->tp_as_number;
1234 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1235 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001236 if (i == -1 && PyErr_Occurred())
1237 return NULL;
1238 if (i < 0)
1239 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001240 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001241 }
1242 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001243 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 char* source_buf;
1245 char* result_buf;
1246 PyObject* result;
1247
Tim Petersae1d0c92006-03-17 03:29:34 +00001248 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001249 PyString_GET_SIZE(self),
1250 &start, &stop, &step, &slicelength) < 0) {
1251 return NULL;
1252 }
1253
1254 if (slicelength <= 0) {
1255 return PyString_FromStringAndSize("", 0);
1256 }
1257 else {
1258 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001259 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001260 if (result_buf == NULL)
1261 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001262
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 cur += step, i++) {
1265 result_buf[i] = source_buf[cur];
1266 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001267
1268 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001269 slicelength);
1270 PyMem_Free(result_buf);
1271 return result;
1272 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001273 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001274 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001275 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001276 "string indices must be integers");
1277 return NULL;
1278 }
1279}
1280
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281static Py_ssize_t
1282string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001283{
1284 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001285 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001286 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001287 return -1;
1288 }
1289 *ptr = (void *)self->ob_sval;
1290 return self->ob_size;
1291}
1292
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293static Py_ssize_t
1294string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001295{
Guido van Rossum045e6881997-09-08 18:30:11 +00001296 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001297 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298 return -1;
1299}
1300
Martin v. Löwis18e16552006-02-15 17:27:45 +00001301static Py_ssize_t
1302string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001303{
1304 if ( lenp )
1305 *lenp = self->ob_size;
1306 return 1;
1307}
1308
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309static Py_ssize_t
1310string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001311{
1312 if ( index != 0 ) {
1313 PyErr_SetString(PyExc_SystemError,
1314 "accessing non-existent string segment");
1315 return -1;
1316 }
1317 *ptr = self->ob_sval;
1318 return self->ob_size;
1319}
1320
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001321static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001322 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001323 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (ssizeargfunc)string_repeat, /*sq_repeat*/
1325 (ssizeargfunc)string_item, /*sq_item*/
1326 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001327 0, /*sq_ass_item*/
1328 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001329 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001330};
1331
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001333 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001334 (binaryfunc)string_subscript,
1335 0,
1336};
1337
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001338static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001339 (readbufferproc)string_buffer_getreadbuf,
1340 (writebufferproc)string_buffer_getwritebuf,
1341 (segcountproc)string_buffer_getsegcount,
1342 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001343};
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345
1346
1347#define LEFTSTRIP 0
1348#define RIGHTSTRIP 1
1349#define BOTHSTRIP 2
1350
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001351/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001352static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1353
1354#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001355
Andrew Dalke525eab32006-05-26 14:00:45 +00001356
1357/* Overallocate the initial list to reduce the number of reallocs for small
1358 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1359 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1360 text (roughly 11 words per line) and field delimited data (usually 1-10
1361 fields). For large strings the split algorithms are bandwidth limited
1362 so increasing the preallocation likely will not improve things.*/
1363
1364#define MAX_PREALLOC 12
1365
1366/* 5 splits gives 6 elements */
1367#define PREALLOC_SIZE(maxsplit) \
1368 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1369
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370#define SPLIT_APPEND(data, left, right) \
1371 str = PyString_FromStringAndSize((data) + (left), \
1372 (right) - (left)); \
1373 if (str == NULL) \
1374 goto onError; \
1375 if (PyList_Append(list, str)) { \
1376 Py_DECREF(str); \
1377 goto onError; \
1378 } \
1379 else \
1380 Py_DECREF(str);
1381
Andrew Dalke02758d62006-05-26 15:21:01 +00001382#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001383 str = PyString_FromStringAndSize((data) + (left), \
1384 (right) - (left)); \
1385 if (str == NULL) \
1386 goto onError; \
1387 if (count < MAX_PREALLOC) { \
1388 PyList_SET_ITEM(list, count, str); \
1389 } else { \
1390 if (PyList_Append(list, str)) { \
1391 Py_DECREF(str); \
1392 goto onError; \
1393 } \
1394 else \
1395 Py_DECREF(str); \
1396 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001397 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001398
1399/* Always force the list to the expected size. */
1400#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1401
Andrew Dalke02758d62006-05-26 15:21:01 +00001402#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1403#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1404#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1405#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1406
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001407Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001408split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409{
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413
1414 if (list == NULL)
1415 return NULL;
1416
Andrew Dalke02758d62006-05-26 15:21:01 +00001417 i = j = 0;
1418
1419 while (maxsplit-- > 0) {
1420 SKIP_SPACE(s, i, len);
1421 if (i==len) break;
1422 j = i; i++;
1423 SKIP_NONSPACE(s, i, len);
1424 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001426
1427 if (i < len) {
1428 /* Only occurs when maxsplit was reached */
1429 /* Skip any remaining whitespace and copy to end of string */
1430 SKIP_SPACE(s, i, len);
1431 if (i != len)
1432 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001434 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 Py_DECREF(list);
1438 return NULL;
1439}
1440
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001441Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001442split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443{
Andrew Dalke525eab32006-05-26 14:00:45 +00001444 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001446 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447
1448 if (list == NULL)
1449 return NULL;
1450
1451 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001452 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001453 if (s[i] == ch) {
1454 if (maxcount-- <= 0)
1455 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001456 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001457 i = j = i + 1;
1458 } else
1459 i++;
1460 }
1461 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 return list;
1466
1467 onError:
1468 Py_DECREF(list);
1469 return NULL;
1470}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001472PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473"S.split([sep [,maxsplit]]) -> list of strings\n\
1474\n\
1475Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001477splits are done. If sep is not specified or is None, any\n\
1478whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479
1480static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001481string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001483 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001484 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001486 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487
Martin v. Löwis9c830762006-04-13 08:37:17 +00001488 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001491 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494 if (PyString_Check(subobj)) {
1495 sub = PyString_AS_STRING(subobj);
1496 n = PyString_GET_SIZE(subobj);
1497 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001498#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 else if (PyUnicode_Check(subobj))
1500 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001501#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1503 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001504
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505 if (n == 0) {
1506 PyErr_SetString(PyExc_ValueError, "empty separator");
1507 return NULL;
1508 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001509 else if (n == 1)
1510 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511
Andrew Dalke525eab32006-05-26 14:00:45 +00001512 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513 if (list == NULL)
1514 return NULL;
1515
1516 i = j = 0;
1517 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001518 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001519 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001520 if (maxsplit-- <= 0)
1521 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001522 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 }
1525 else
1526 i++;
1527 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001528 SPLIT_ADD(s, j, len);
1529 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 return list;
1531
Andrew Dalke525eab32006-05-26 14:00:45 +00001532 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 Py_DECREF(list);
1534 return NULL;
1535}
1536
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001537PyDoc_STRVAR(partition__doc__,
1538"S.partition(sep) -> (head, sep, tail)\n\
1539\n\
1540Searches for the separator sep in S, and returns the part before it,\n\
1541the separator itself, and the part after it. If the separator is not\n\
1542found, returns S and two empty strings.");
1543
1544static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001545string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546{
Fredrik Lundhb9479482006-05-26 17:22:38 +00001547 Py_ssize_t str_len = PyString_GET_SIZE(self), sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548 const char *str = PyString_AS_STRING(self), *sep;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550 if (PyString_Check(sep_obj)) {
1551 sep = PyString_AS_STRING(sep_obj);
1552 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001553 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001554#ifdef Py_USING_UNICODE
1555 else if (PyUnicode_Check(sep_obj))
1556 return PyUnicode_Partition((PyObject *)self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001558 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001559 return NULL;
1560
Fredrik Lundhb9479482006-05-26 17:22:38 +00001561 return partition((PyObject*)self, str, str_len, sep_obj, sep, sep_len);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001562}
1563
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001564Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001565rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001566{
Andrew Dalke525eab32006-05-26 14:00:45 +00001567 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001568 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001569 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001570
1571 if (list == NULL)
1572 return NULL;
1573
Andrew Dalke02758d62006-05-26 15:21:01 +00001574 i = j = len-1;
1575
1576 while (maxsplit-- > 0) {
1577 RSKIP_SPACE(s, i);
1578 if (i<0) break;
1579 j = i; i--;
1580 RSKIP_NONSPACE(s, i);
1581 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001583 if (i >= 0) {
1584 /* Only occurs when maxsplit was reached */
1585 /* Skip any remaining whitespace and copy to beginning of string */
1586 RSKIP_SPACE(s, i);
1587 if (i >= 0)
1588 SPLIT_ADD(s, 0, i + 1);
1589
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001591 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001592 if (PyList_Reverse(list) < 0)
1593 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001594 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001596 Py_DECREF(list);
1597 return NULL;
1598}
1599
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001600Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001601rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001602{
Andrew Dalke525eab32006-05-26 14:00:45 +00001603 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001604 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001605 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001606
1607 if (list == NULL)
1608 return NULL;
1609
1610 for (i = j = len - 1; i >= 0; ) {
1611 if (s[i] == ch) {
1612 if (maxcount-- <= 0)
1613 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001614 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001615 j = i = i - 1;
1616 } else
1617 i--;
1618 }
1619 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001620 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001621 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001622 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001623 if (PyList_Reverse(list) < 0)
1624 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001625 return list;
1626
1627 onError:
1628 Py_DECREF(list);
1629 return NULL;
1630}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001631
1632PyDoc_STRVAR(rsplit__doc__,
1633"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1634\n\
1635Return a list of the words in the string S, using sep as the\n\
1636delimiter string, starting at the end of the string and working\n\
1637to the front. If maxsplit is given, at most maxsplit splits are\n\
1638done. If sep is not specified or is None, any whitespace string\n\
1639is a separator.");
1640
1641static PyObject *
1642string_rsplit(PyStringObject *self, PyObject *args)
1643{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001644 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001645 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001646 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001647 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001648
Martin v. Löwis9c830762006-04-13 08:37:17 +00001649 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001650 return NULL;
1651 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001652 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001653 if (subobj == Py_None)
1654 return rsplit_whitespace(s, len, maxsplit);
1655 if (PyString_Check(subobj)) {
1656 sub = PyString_AS_STRING(subobj);
1657 n = PyString_GET_SIZE(subobj);
1658 }
1659#ifdef Py_USING_UNICODE
1660 else if (PyUnicode_Check(subobj))
1661 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1662#endif
1663 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1664 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001665
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 if (n == 0) {
1667 PyErr_SetString(PyExc_ValueError, "empty separator");
1668 return NULL;
1669 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001670 else if (n == 1)
1671 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001672
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 if (list == NULL)
1675 return NULL;
1676
1677 j = len;
1678 i = j - n;
1679 while (i >= 0) {
1680 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1681 if (maxsplit-- <= 0)
1682 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001683 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001684 j = i;
1685 i -= n;
1686 }
1687 else
1688 i--;
1689 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001690 SPLIT_ADD(s, 0, j);
1691 FIX_PREALLOC_SIZE(list);
1692 if (PyList_Reverse(list) < 0)
1693 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 return list;
1695
Andrew Dalke525eab32006-05-26 14:00:45 +00001696onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001697 Py_DECREF(list);
1698 return NULL;
1699}
1700
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001702PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703"S.join(sequence) -> string\n\
1704\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001705Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001706sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
1708static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001709string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710{
1711 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001712 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001715 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001716 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001717 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001718 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719
Tim Peters19fe14e2001-01-19 03:03:47 +00001720 seq = PySequence_Fast(orig, "");
1721 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001722 return NULL;
1723 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001724
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001725 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001726 if (seqlen == 0) {
1727 Py_DECREF(seq);
1728 return PyString_FromString("");
1729 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001731 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001732 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1733 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001734 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001735 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001736 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001738
Raymond Hettinger674f2412004-08-23 23:23:54 +00001739 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001740 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001741 * Do a pre-pass to figure out the total amount of space we'll
1742 * need (sz), see whether any argument is absurd, and defer to
1743 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001745 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001746 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001747 item = PySequence_Fast_GET_ITEM(seq, i);
1748 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001749#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001751 /* Defer to Unicode join.
1752 * CAUTION: There's no gurantee that the
1753 * original sequence can be iterated over
1754 * again, so we must pass seq here.
1755 */
1756 PyObject *result;
1757 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001758 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001759 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001760 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001761#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001762 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001763 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001764 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001765 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001766 Py_DECREF(seq);
1767 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001768 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001769 sz += PyString_GET_SIZE(item);
1770 if (i != 0)
1771 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001772 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 PyErr_SetString(PyExc_OverflowError,
1774 "join() is too long for a Python string");
1775 Py_DECREF(seq);
1776 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001778 }
1779
1780 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001781 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 if (res == NULL) {
1783 Py_DECREF(seq);
1784 return NULL;
1785 }
1786
1787 /* Catenate everything. */
1788 p = PyString_AS_STRING(res);
1789 for (i = 0; i < seqlen; ++i) {
1790 size_t n;
1791 item = PySequence_Fast_GET_ITEM(seq, i);
1792 n = PyString_GET_SIZE(item);
1793 memcpy(p, PyString_AS_STRING(item), n);
1794 p += n;
1795 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 memcpy(p, sep, seplen);
1797 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001798 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001800
Jeremy Hylton49048292000-07-11 03:28:17 +00001801 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803}
1804
Tim Peters52e155e2001-06-16 05:42:57 +00001805PyObject *
1806_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001807{
Tim Petersa7259592001-06-16 05:11:17 +00001808 assert(sep != NULL && PyString_Check(sep));
1809 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001810 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001811}
1812
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001813Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001814string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001815{
1816 if (*end > len)
1817 *end = len;
1818 else if (*end < 0)
1819 *end += len;
1820 if (*end < 0)
1821 *end = 0;
1822 if (*start < 0)
1823 *start += len;
1824 if (*start < 0)
1825 *start = 0;
1826}
1827
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001828Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001829string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001831 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001832 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001833 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001834 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835
Martin v. Löwis18e16552006-02-15 17:27:45 +00001836 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001837 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001838 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001839 return -2;
1840 if (PyString_Check(subobj)) {
1841 sub = PyString_AS_STRING(subobj);
1842 n = PyString_GET_SIZE(subobj);
1843 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001844#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001846 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001847#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 return -2;
1850
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001851 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001853#ifdef USE_FAST
1854 if (n == 0)
1855 return (dir > 0) ? i : last;
1856 if (dir > 0) {
1857 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1858 FAST_SEARCH);
1859 if (pos < 0)
1860 return pos;
1861 return pos + i;
1862 }
1863#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 if (dir > 0) {
1865 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867 last -= n;
1868 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001869 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 return (long)i;
1871 }
1872 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001873 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001874
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001878 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001879 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001880 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001881
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 return -1;
1883}
1884
1885
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001886PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887"S.find(sub [,start [,end]]) -> int\n\
1888\n\
1889Return the lowest index in S where substring sub is found,\n\
1890such that sub is contained within s[start,end]. Optional\n\
1891arguments start and end are interpreted as in slice notation.\n\
1892\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001893Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894
1895static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001896string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001898 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899 if (result == -2)
1900 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001901 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902}
1903
1904
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001905PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906"S.index(sub [,start [,end]]) -> int\n\
1907\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001908Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909
1910static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001911string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 if (result == -2)
1915 return NULL;
1916 if (result == -1) {
1917 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001918 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 return NULL;
1920 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001921 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922}
1923
1924
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001925PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926"S.rfind(sub [,start [,end]]) -> int\n\
1927\n\
1928Return the highest index in S where substring sub is found,\n\
1929such that sub is contained within s[start,end]. Optional\n\
1930arguments start and end are interpreted as in slice notation.\n\
1931\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001932Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933
1934static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001935string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001937 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938 if (result == -2)
1939 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001940 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941}
1942
1943
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001944PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945"S.rindex(sub [,start [,end]]) -> int\n\
1946\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001947Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948
1949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001950string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001952 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 if (result == -2)
1954 return NULL;
1955 if (result == -1) {
1956 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001957 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 return NULL;
1959 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961}
1962
1963
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001964Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001965do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1966{
1967 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001968 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001970 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1971 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001972
1973 i = 0;
1974 if (striptype != RIGHTSTRIP) {
1975 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1976 i++;
1977 }
1978 }
1979
1980 j = len;
1981 if (striptype != LEFTSTRIP) {
1982 do {
1983 j--;
1984 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1985 j++;
1986 }
1987
1988 if (i == 0 && j == len && PyString_CheckExact(self)) {
1989 Py_INCREF(self);
1990 return (PyObject*)self;
1991 }
1992 else
1993 return PyString_FromStringAndSize(s+i, j-i);
1994}
1995
1996
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001997Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001998do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999{
2000 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002001 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003 i = 0;
2004 if (striptype != RIGHTSTRIP) {
2005 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2006 i++;
2007 }
2008 }
2009
2010 j = len;
2011 if (striptype != LEFTSTRIP) {
2012 do {
2013 j--;
2014 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2015 j++;
2016 }
2017
Tim Peters8fa5dd02001-09-12 02:18:30 +00002018 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019 Py_INCREF(self);
2020 return (PyObject*)self;
2021 }
2022 else
2023 return PyString_FromStringAndSize(s+i, j-i);
2024}
2025
2026
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002027Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002028do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2029{
2030 PyObject *sep = NULL;
2031
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002032 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002033 return NULL;
2034
2035 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002036 if (PyString_Check(sep))
2037 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002038#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002039 else if (PyUnicode_Check(sep)) {
2040 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2041 PyObject *res;
2042 if (uniself==NULL)
2043 return NULL;
2044 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2045 striptype, sep);
2046 Py_DECREF(uniself);
2047 return res;
2048 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002049#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002050 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002051#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002052 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002054 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 STRIPNAME(striptype));
2057 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002058 }
2059
2060 return do_strip(self, striptype);
2061}
2062
2063
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002064PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002065"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066\n\
2067Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002068whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002069If chars is given and not None, remove characters in chars instead.\n\
2070If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071
2072static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002073string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002075 if (PyTuple_GET_SIZE(args) == 0)
2076 return do_strip(self, BOTHSTRIP); /* Common case */
2077 else
2078 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079}
2080
2081
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002082PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002083"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002086If chars is given and not None, remove characters in chars instead.\n\
2087If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088
2089static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002090string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002092 if (PyTuple_GET_SIZE(args) == 0)
2093 return do_strip(self, LEFTSTRIP); /* Common case */
2094 else
2095 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096}
2097
2098
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002099PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002100"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002103If chars is given and not None, remove characters in chars instead.\n\
2104If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105
2106static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002107string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002109 if (PyTuple_GET_SIZE(args) == 0)
2110 return do_strip(self, RIGHTSTRIP); /* Common case */
2111 else
2112 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113}
2114
2115
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002116PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117"S.lower() -> string\n\
2118\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002119Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002121/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2122#ifndef _tolower
2123#define _tolower tolower
2124#endif
2125
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002127string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002129 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002130 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002131 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002133 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002134 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002136
2137 s = PyString_AS_STRING(newobj);
2138
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002139 memcpy(s, PyString_AS_STRING(self), n);
2140
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002142 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002143 if (isupper(c))
2144 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002146
Anthony Baxtera6286212006-04-11 07:42:36 +00002147 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148}
2149
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002150PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151"S.upper() -> string\n\
2152\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002153Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002155#ifndef _toupper
2156#define _toupper toupper
2157#endif
2158
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002160string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002162 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002163 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002164 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002166 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002167 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002169
2170 s = PyString_AS_STRING(newobj);
2171
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002172 memcpy(s, PyString_AS_STRING(self), n);
2173
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002175 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002176 if (islower(c))
2177 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002179
Anthony Baxtera6286212006-04-11 07:42:36 +00002180 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181}
2182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002183PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184"S.title() -> string\n\
2185\n\
2186Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188
2189static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002190string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191{
2192 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002193 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002195 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196
Anthony Baxtera6286212006-04-11 07:42:36 +00002197 newobj = PyString_FromStringAndSize(NULL, n);
2198 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002200 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 for (i = 0; i < n; i++) {
2202 int c = Py_CHARMASK(*s++);
2203 if (islower(c)) {
2204 if (!previous_is_cased)
2205 c = toupper(c);
2206 previous_is_cased = 1;
2207 } else if (isupper(c)) {
2208 if (previous_is_cased)
2209 c = tolower(c);
2210 previous_is_cased = 1;
2211 } else
2212 previous_is_cased = 0;
2213 *s_new++ = c;
2214 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002215 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216}
2217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002218PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219"S.capitalize() -> string\n\
2220\n\
2221Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
2224static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002225string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226{
2227 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002228 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230
Anthony Baxtera6286212006-04-11 07:42:36 +00002231 newobj = PyString_FromStringAndSize(NULL, n);
2232 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002234 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235 if (0 < n) {
2236 int c = Py_CHARMASK(*s++);
2237 if (islower(c))
2238 *s_new = toupper(c);
2239 else
2240 *s_new = c;
2241 s_new++;
2242 }
2243 for (i = 1; i < n; i++) {
2244 int c = Py_CHARMASK(*s++);
2245 if (isupper(c))
2246 *s_new = tolower(c);
2247 else
2248 *s_new = c;
2249 s_new++;
2250 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002251 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252}
2253
2254
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002255PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256"S.count(sub[, start[, end]]) -> int\n\
2257\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002258Return the number of non-overlapping occurrences of substring sub in\n\
2259string S[start:end]. Optional arguments start and end are interpreted\n\
2260as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
2262static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002263string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002265 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002266 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002267 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002268 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270
Guido van Rossumc6821402000-05-08 14:08:05 +00002271 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2272 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002274
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275 if (PyString_Check(subobj)) {
2276 sub = PyString_AS_STRING(subobj);
2277 n = PyString_GET_SIZE(subobj);
2278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002280 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002281 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002282 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2283 if (count == -1)
2284 return NULL;
2285 else
2286 return PyInt_FromLong((long) count);
2287 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002288#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2290 return NULL;
2291
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002292 string_adjust_indices(&i, &last, len);
2293
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294 m = last + 1 - n;
2295 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002296 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297
Fredrik Lundhaf722372006-05-25 17:55:31 +00002298#ifdef USE_FAST
2299 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2300 if (r < 0)
2301 r = 0; /* no match */
2302#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303 r = 0;
2304 while (i < m) {
Fredrik Lundha50d2012006-05-26 17:04:58 +00002305 const char *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306 if (!memcmp(s+i, sub, n)) {
2307 r++;
2308 i += n;
2309 } else {
2310 i++;
2311 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002312 if (i >= m)
2313 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002314 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002315 if (t == NULL)
2316 break;
2317 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002319#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002320 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321}
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.swapcase() -> string\n\
2325\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002327converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328
2329static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002330string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331{
2332 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002333 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002334 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335
Anthony Baxtera6286212006-04-11 07:42:36 +00002336 newobj = PyString_FromStringAndSize(NULL, n);
2337 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002339 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 for (i = 0; i < n; i++) {
2341 int c = Py_CHARMASK(*s++);
2342 if (islower(c)) {
2343 *s_new = toupper(c);
2344 }
2345 else if (isupper(c)) {
2346 *s_new = tolower(c);
2347 }
2348 else
2349 *s_new = c;
2350 s_new++;
2351 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002352 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353}
2354
2355
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002356PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357"S.translate(table [,deletechars]) -> string\n\
2358\n\
2359Return a copy of the string S, where all characters occurring\n\
2360in the optional argument deletechars are removed, and the\n\
2361remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002362translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
2364static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002365string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 register char *input, *output;
2368 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002369 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002372 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 PyObject *result;
2374 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002377 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380
2381 if (PyString_Check(tableobj)) {
2382 table1 = PyString_AS_STRING(tableobj);
2383 tablen = PyString_GET_SIZE(tableobj);
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002387 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 parameter; instead a mapping to None will cause characters
2389 to be deleted. */
2390 if (delobj != NULL) {
2391 PyErr_SetString(PyExc_TypeError,
2392 "deletions are implemented differently for unicode");
2393 return NULL;
2394 }
2395 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400
Martin v. Löwis00b61272002-12-12 20:03:19 +00002401 if (tablen != 256) {
2402 PyErr_SetString(PyExc_ValueError,
2403 "translation table must be 256 characters long");
2404 return NULL;
2405 }
2406
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 if (delobj != NULL) {
2408 if (PyString_Check(delobj)) {
2409 del_table = PyString_AS_STRING(delobj);
2410 dellen = PyString_GET_SIZE(delobj);
2411 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002412#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413 else if (PyUnicode_Check(delobj)) {
2414 PyErr_SetString(PyExc_TypeError,
2415 "deletions are implemented differently for unicode");
2416 return NULL;
2417 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002418#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2420 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002421 }
2422 else {
2423 del_table = NULL;
2424 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 }
2426
2427 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002428 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 result = PyString_FromStringAndSize((char *)NULL, inlen);
2430 if (result == NULL)
2431 return NULL;
2432 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002433 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
2435 if (dellen == 0) {
2436 /* If no deletions are required, use faster code */
2437 for (i = inlen; --i >= 0; ) {
2438 c = Py_CHARMASK(*input++);
2439 if (Py_CHARMASK((*output++ = table[c])) != c)
2440 changed = 1;
2441 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002442 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 return result;
2444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2447 }
2448
2449 for (i = 0; i < 256; i++)
2450 trans_table[i] = Py_CHARMASK(table[i]);
2451
2452 for (i = 0; i < dellen; i++)
2453 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2454
2455 for (i = inlen; --i >= 0; ) {
2456 c = Py_CHARMASK(*input++);
2457 if (trans_table[c] != -1)
2458 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2459 continue;
2460 changed = 1;
2461 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002462 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 Py_DECREF(result);
2464 Py_INCREF(input_obj);
2465 return input_obj;
2466 }
2467 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002468 if (inlen > 0)
2469 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470 return result;
2471}
2472
2473
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002474#define FORWARD 1
2475#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002477/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002479/* Don't call if length < 2 */
2480#define Py_STRING_MATCH(target, offset, pattern, length) \
2481 (target[offset] == pattern[0] && \
2482 target[offset+length-1] == pattern[length-1] && \
2483 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485#define findchar(target, target_len, c) \
2486 ((char *)memchr((const void *)(target), c, target_len))
2487
2488/* String ops must return a string. */
2489/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002490Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002491return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493 if (PyString_CheckExact(self)) {
2494 Py_INCREF(self);
2495 return self;
2496 }
2497 return (PyStringObject *)PyString_FromStringAndSize(
2498 PyString_AS_STRING(self),
2499 PyString_GET_SIZE(self));
2500}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002502Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002503countchar(char *target, int target_len, char c)
2504{
2505 Py_ssize_t count=0;
2506 char *start=target;
2507 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002509 while ( (start=findchar(start, end-start, c)) != NULL ) {
2510 count++;
2511 start += 1;
2512 }
2513
2514 return count;
2515}
2516
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002517Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002518findstring(char *target, Py_ssize_t target_len,
2519 char *pattern, Py_ssize_t pattern_len,
2520 Py_ssize_t start,
2521 Py_ssize_t end,
2522 int direction)
2523{
2524 if (start < 0) {
2525 start += target_len;
2526 if (start < 0)
2527 start = 0;
2528 }
2529 if (end > target_len) {
2530 end = target_len;
2531 } else if (end < 0) {
2532 end += target_len;
2533 if (end < 0)
2534 end = 0;
2535 }
2536
2537 /* zero-length substrings always match at the first attempt */
2538 if (pattern_len == 0)
2539 return (direction > 0) ? start : end;
2540
2541 end -= pattern_len;
2542
2543 if (direction < 0) {
2544 for (; end >= start; end--)
2545 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2546 return end;
2547 } else {
2548 for (; start <= end; start++)
2549 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2550 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551 }
2552 return -1;
2553}
2554
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002555Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002556countstring(char *target, Py_ssize_t target_len,
2557 char *pattern, Py_ssize_t pattern_len,
2558 Py_ssize_t start,
2559 Py_ssize_t end,
2560 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002562 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002564 if (start < 0) {
2565 start += target_len;
2566 if (start < 0)
2567 start = 0;
2568 }
2569 if (end > target_len) {
2570 end = target_len;
2571 } else if (end < 0) {
2572 end += target_len;
2573 if (end < 0)
2574 end = 0;
2575 }
2576
2577 /* zero-length substrings match everywhere */
2578 if (pattern_len == 0)
2579 return target_len+1;
2580
2581 end -= pattern_len;
2582
2583 if (direction < 0) {
2584 for (; end >= start; end--)
2585 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2586 count++;
2587 end -= pattern_len-1;
2588 }
2589 } else {
2590 for (; start <= end; start++)
2591 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2592 count++;
2593 start += pattern_len-1;
2594 }
2595 }
2596 return count;
2597}
2598
2599
2600/* Algorithms for difference cases of string replacement */
2601
2602/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002603Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002604replace_interleave(PyStringObject *self,
2605 PyStringObject *to,
2606 Py_ssize_t maxcount)
2607{
2608 char *self_s, *to_s, *result_s;
2609 Py_ssize_t self_len, to_len, result_len;
2610 Py_ssize_t count, i, product;
2611 PyStringObject *result;
2612
2613 self_len = PyString_GET_SIZE(self);
2614 to_len = PyString_GET_SIZE(to);
2615
2616 /* 1 at the end plus 1 after every character */
2617 count = self_len+1;
2618 if (maxcount < count)
2619 count = maxcount;
2620
2621 /* Check for overflow */
2622 /* result_len = count * to_len + self_len; */
2623 product = count * to_len;
2624 if (product / to_len != count) {
2625 PyErr_SetString(PyExc_OverflowError,
2626 "replace string is too long");
2627 return NULL;
2628 }
2629 result_len = product + self_len;
2630 if (result_len < 0) {
2631 PyErr_SetString(PyExc_OverflowError,
2632 "replace string is too long");
2633 return NULL;
2634 }
2635
2636 if (! (result = (PyStringObject *)
2637 PyString_FromStringAndSize(NULL, result_len)) )
2638 return NULL;
2639
2640 self_s = PyString_AS_STRING(self);
2641 to_s = PyString_AS_STRING(to);
2642 to_len = PyString_GET_SIZE(to);
2643 result_s = PyString_AS_STRING(result);
2644
2645 /* TODO: special case single character, which doesn't need memcpy */
2646
2647 /* Lay the first one down (guaranteed this will occur) */
2648 memcpy(result_s, to_s, to_len);
2649 result_s += to_len;
2650 count -= 1;
2651
2652 for (i=0; i<count; i++) {
2653 *result_s++ = *self_s++;
2654 memcpy(result_s, to_s, to_len);
2655 result_s += to_len;
2656 }
2657
2658 /* Copy the rest of the original string */
2659 memcpy(result_s, self_s, self_len-i);
2660
2661 return result;
2662}
2663
2664/* Special case for deleting a single character */
2665/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002666Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002667replace_delete_single_character(PyStringObject *self,
2668 char from_c, Py_ssize_t maxcount)
2669{
2670 char *self_s, *result_s;
2671 char *start, *next, *end;
2672 Py_ssize_t self_len, result_len;
2673 Py_ssize_t count;
2674 PyStringObject *result;
2675
2676 self_len = PyString_GET_SIZE(self);
2677 self_s = PyString_AS_STRING(self);
2678
2679 count = countchar(self_s, self_len, from_c);
2680 if (count == 0) {
2681 return return_self(self);
2682 }
2683 if (count > maxcount)
2684 count = maxcount;
2685
2686 result_len = self_len - count; /* from_len == 1 */
2687 assert(result_len>=0);
2688
2689 if ( (result = (PyStringObject *)
2690 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2691 return NULL;
2692 result_s = PyString_AS_STRING(result);
2693
2694 start = self_s;
2695 end = self_s + self_len;
2696 while (count-- > 0) {
2697 next = findchar(start, end-start, from_c);
2698 if (next == NULL)
2699 break;
2700 memcpy(result_s, start, next-start);
2701 result_s += (next-start);
2702 start = next+1;
2703 }
2704 memcpy(result_s, start, end-start);
2705
2706 return result;
2707}
2708
2709/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2710
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002711Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002712replace_delete_substring(PyStringObject *self, PyStringObject *from,
2713 Py_ssize_t maxcount) {
2714 char *self_s, *from_s, *result_s;
2715 char *start, *next, *end;
2716 Py_ssize_t self_len, from_len, result_len;
2717 Py_ssize_t count, offset;
2718 PyStringObject *result;
2719
2720 self_len = PyString_GET_SIZE(self);
2721 self_s = PyString_AS_STRING(self);
2722 from_len = PyString_GET_SIZE(from);
2723 from_s = PyString_AS_STRING(from);
2724
2725 count = countstring(self_s, self_len,
2726 from_s, from_len,
2727 0, self_len, 1);
2728
2729 if (count > maxcount)
2730 count = maxcount;
2731
2732 if (count == 0) {
2733 /* no matches */
2734 return return_self(self);
2735 }
2736
2737 result_len = self_len - (count * from_len);
2738 assert (result_len>=0);
2739
2740 if ( (result = (PyStringObject *)
2741 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2742 return NULL;
2743
2744 result_s = PyString_AS_STRING(result);
2745
2746 start = self_s;
2747 end = self_s + self_len;
2748 while (count-- > 0) {
2749 offset = findstring(start, end-start,
2750 from_s, from_len,
2751 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002752 if (offset == -1)
2753 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 next = start + offset;
2755
2756 memcpy(result_s, start, next-start);
2757
2758 result_s += (next-start);
2759 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002760 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 memcpy(result_s, start, end-start);
2762 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002763}
2764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002766Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767replace_single_character_in_place(PyStringObject *self,
2768 char from_c, char to_c,
2769 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002770{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 char *self_s, *result_s, *start, *end, *next;
2772 Py_ssize_t self_len;
2773 PyStringObject *result;
2774
2775 /* The result string will be the same size */
2776 self_s = PyString_AS_STRING(self);
2777 self_len = PyString_GET_SIZE(self);
2778
2779 next = findchar(self_s, self_len, from_c);
2780
2781 if (next == NULL) {
2782 /* No matches; return the original string */
2783 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002784 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785
2786 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002787 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 if (result == NULL)
2789 return NULL;
2790 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002791 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792
2793 /* change everything in-place, starting with this one */
2794 start = result_s + (next-self_s);
2795 *start = to_c;
2796 start++;
2797 end = result_s + self_len;
2798
2799 while (--maxcount > 0) {
2800 next = findchar(start, end-start, from_c);
2801 if (next == NULL)
2802 break;
2803 *next = to_c;
2804 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002805 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806
2807 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002808}
2809
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002811Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812replace_substring_in_place(PyStringObject *self,
2813 PyStringObject *from,
2814 PyStringObject *to,
2815 Py_ssize_t maxcount)
2816{
2817 char *result_s, *start, *end;
2818 char *self_s, *from_s, *to_s;
2819 Py_ssize_t self_len, from_len, offset;
2820 PyStringObject *result;
2821
2822 /* The result string will be the same size */
2823
2824 self_s = PyString_AS_STRING(self);
2825 self_len = PyString_GET_SIZE(self);
2826
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2829 to_s = PyString_AS_STRING(to);
2830
2831 offset = findstring(self_s, self_len,
2832 from_s, from_len,
2833 0, self_len, FORWARD);
2834
2835 if (offset == -1) {
2836 /* No matches; return the original string */
2837 return return_self(self);
2838 }
2839
2840 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002841 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 if (result == NULL)
2843 return NULL;
2844 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002845 memcpy(result_s, self_s, self_len);
2846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847
2848 /* change everything in-place, starting with this one */
2849 start = result_s + offset;
2850 memcpy(start, to_s, from_len);
2851 start += from_len;
2852 end = result_s + self_len;
2853
2854 while ( --maxcount > 0) {
2855 offset = findstring(start, end-start,
2856 from_s, from_len,
2857 0, end-start, FORWARD);
2858 if (offset==-1)
2859 break;
2860 memcpy(start+offset, to_s, from_len);
2861 start += offset+from_len;
2862 }
2863
2864 return result;
2865}
2866
2867/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002868Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869replace_single_character(PyStringObject *self,
2870 char from_c,
2871 PyStringObject *to,
2872 Py_ssize_t maxcount)
2873{
2874 char *self_s, *to_s, *result_s;
2875 char *start, *next, *end;
2876 Py_ssize_t self_len, to_len, result_len;
2877 Py_ssize_t count, product;
2878 PyStringObject *result;
2879
2880 self_s = PyString_AS_STRING(self);
2881 self_len = PyString_GET_SIZE(self);
2882
2883 count = countchar(self_s, self_len, from_c);
2884 if (count > maxcount)
2885 count = maxcount;
2886
2887 if (count == 0) {
2888 /* no matches, return unchanged */
2889 return return_self(self);
2890 }
2891
2892 to_s = PyString_AS_STRING(to);
2893 to_len = PyString_GET_SIZE(to);
2894
2895 /* use the difference between current and new, hence the "-1" */
2896 /* result_len = self_len + count * (to_len-1) */
2897 product = count * (to_len-1);
2898 if (product / (to_len-1) != count) {
2899 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2900 return NULL;
2901 }
2902 result_len = self_len + product;
2903 if (result_len < 0) {
2904 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2905 return NULL;
2906 }
2907
2908 if ( (result = (PyStringObject *)
2909 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2910 return NULL;
2911 result_s = PyString_AS_STRING(result);
2912
2913 start = self_s;
2914 end = self_s + self_len;
2915 while (count-- > 0) {
2916 next = findchar(start, end-start, from_c);
2917 if (next == NULL)
2918 break;
2919
2920 if (next == start) {
2921 /* replace with the 'to' */
2922 memcpy(result_s, to_s, to_len);
2923 result_s += to_len;
2924 start += 1;
2925 } else {
2926 /* copy the unchanged old then the 'to' */
2927 memcpy(result_s, start, next-start);
2928 result_s += (next-start);
2929 memcpy(result_s, to_s, to_len);
2930 result_s += to_len;
2931 start = next+1;
2932 }
2933 }
2934 /* Copy the remainder of the remaining string */
2935 memcpy(result_s, start, end-start);
2936
2937 return result;
2938}
2939
2940/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002941Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942replace_substring(PyStringObject *self,
2943 PyStringObject *from,
2944 PyStringObject *to,
2945 Py_ssize_t maxcount) {
2946 char *self_s, *from_s, *to_s, *result_s;
2947 char *start, *next, *end;
2948 Py_ssize_t self_len, from_len, to_len, result_len;
2949 Py_ssize_t count, offset, product;
2950 PyStringObject *result;
2951
2952 self_s = PyString_AS_STRING(self);
2953 self_len = PyString_GET_SIZE(self);
2954 from_s = PyString_AS_STRING(from);
2955 from_len = PyString_GET_SIZE(from);
2956
2957 count = countstring(self_s, self_len,
2958 from_s, from_len,
2959 0, self_len, FORWARD);
2960 if (count > maxcount)
2961 count = maxcount;
2962
2963 if (count == 0) {
2964 /* no matches, return unchanged */
2965 return return_self(self);
2966 }
2967
2968 to_s = PyString_AS_STRING(to);
2969 to_len = PyString_GET_SIZE(to);
2970
2971 /* Check for overflow */
2972 /* result_len = self_len + count * (to_len-from_len) */
2973 product = count * (to_len-from_len);
2974 if (product / (to_len-from_len) != count) {
2975 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2976 return NULL;
2977 }
2978 result_len = self_len + product;
2979 if (result_len < 0) {
2980 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2981 return NULL;
2982 }
2983
2984 if ( (result = (PyStringObject *)
2985 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2986 return NULL;
2987 result_s = PyString_AS_STRING(result);
2988
2989 start = self_s;
2990 end = self_s + self_len;
2991 while (count-- > 0) {
2992 offset = findstring(start, end-start,
2993 from_s, from_len,
2994 0, end-start, FORWARD);
2995 if (offset == -1)
2996 break;
2997 next = start+offset;
2998 if (next == start) {
2999 /* replace with the 'to' */
3000 memcpy(result_s, to_s, to_len);
3001 result_s += to_len;
3002 start += from_len;
3003 } else {
3004 /* copy the unchanged old then the 'to' */
3005 memcpy(result_s, start, next-start);
3006 result_s += (next-start);
3007 memcpy(result_s, to_s, to_len);
3008 result_s += to_len;
3009 start = next+from_len;
3010 }
3011 }
3012 /* Copy the remainder of the remaining string */
3013 memcpy(result_s, start, end-start);
3014
3015 return result;
3016}
3017
3018
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003019Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003020replace(PyStringObject *self,
3021 PyStringObject *from,
3022 PyStringObject *to,
3023 Py_ssize_t maxcount)
3024{
3025 Py_ssize_t from_len, to_len;
3026
3027 if (maxcount < 0) {
3028 maxcount = PY_SSIZE_T_MAX;
3029 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3030 /* nothing to do; return the original string */
3031 return return_self(self);
3032 }
3033
3034 from_len = PyString_GET_SIZE(from);
3035 to_len = PyString_GET_SIZE(to);
3036
3037 if (maxcount == 0 ||
3038 (from_len == 0 && to_len == 0)) {
3039 /* nothing to do; return the original string */
3040 return return_self(self);
3041 }
3042
3043 /* Handle zero-length special cases */
3044
3045 if (from_len == 0) {
3046 /* insert the 'to' string everywhere. */
3047 /* >>> "Python".replace("", ".") */
3048 /* '.P.y.t.h.o.n.' */
3049 return replace_interleave(self, to, maxcount);
3050 }
3051
3052 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3053 /* point for an empty self string to generate a non-empty string */
3054 /* Special case so the remaining code always gets a non-empty string */
3055 if (PyString_GET_SIZE(self) == 0) {
3056 return return_self(self);
3057 }
3058
3059 if (to_len == 0) {
3060 /* delete all occurances of 'from' string */
3061 if (from_len == 1) {
3062 return replace_delete_single_character(
3063 self, PyString_AS_STRING(from)[0], maxcount);
3064 } else {
3065 return replace_delete_substring(self, from, maxcount);
3066 }
3067 }
3068
3069 /* Handle special case where both strings have the same length */
3070
3071 if (from_len == to_len) {
3072 if (from_len == 1) {
3073 return replace_single_character_in_place(
3074 self,
3075 PyString_AS_STRING(from)[0],
3076 PyString_AS_STRING(to)[0],
3077 maxcount);
3078 } else {
3079 return replace_substring_in_place(
3080 self, from, to, maxcount);
3081 }
3082 }
3083
3084 /* Otherwise use the more generic algorithms */
3085 if (from_len == 1) {
3086 return replace_single_character(self, PyString_AS_STRING(from)[0],
3087 to, maxcount);
3088 } else {
3089 /* len('from')>=2, len('to')>=1 */
3090 return replace_substring(self, from, to, maxcount);
3091 }
3092}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003094PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003095"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003096\n\
3097Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003098old replaced by new. If the optional argument count is\n\
3099given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003100
3101static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003102string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003103{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003104 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003105 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003106 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003107 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003108
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003110 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003112 if (PyString_Check(from)) {
3113 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003115#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003116 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003117 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003118 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003119#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003120 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121 return NULL;
3122
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003123 if (PyString_Check(to)) {
3124 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003126#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003127 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003128 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003129 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003130#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003131 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003132 return NULL;
3133
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003134 return (PyObject *)replace((PyStringObject *) self,
3135 (PyStringObject *) from,
3136 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137}
3138
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003139/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003141PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003142"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003143\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003144Return True if S starts with the specified prefix, False otherwise.\n\
3145With optional start, test S beginning at that position.\n\
3146With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147
3148static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003149string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003152 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003153 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003154 Py_ssize_t plen;
3155 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003156 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158
Guido van Rossumc6821402000-05-08 14:08:05 +00003159 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3160 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
3162 if (PyString_Check(subobj)) {
3163 prefix = PyString_AS_STRING(subobj);
3164 plen = PyString_GET_SIZE(subobj);
3165 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003166#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003167 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003168 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003169 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003170 subobj, start, end, -1);
3171 if (rc == -1)
3172 return NULL;
3173 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003174 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003175 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003176#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178 return NULL;
3179
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003180 string_adjust_indices(&start, &end, len);
3181
3182 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003183 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003184
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003185 if (end-start >= plen)
3186 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3187 else
3188 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003189}
3190
3191
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003192PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003193"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003194\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003195Return True if S ends with the specified suffix, False otherwise.\n\
3196With optional start, test S beginning at that position.\n\
3197With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003198
3199static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003200string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003201{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003203 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003205 Py_ssize_t slen;
3206 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003207 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209
Guido van Rossumc6821402000-05-08 14:08:05 +00003210 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3211 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003212 return NULL;
3213 if (PyString_Check(subobj)) {
3214 suffix = PyString_AS_STRING(subobj);
3215 slen = PyString_GET_SIZE(subobj);
3216 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003217#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003218 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003219 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003220 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003221 subobj, start, end, +1);
3222 if (rc == -1)
3223 return NULL;
3224 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003225 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003226 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003227#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003228 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003229 return NULL;
3230
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003231 string_adjust_indices(&start, &end, len);
3232
3233 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003234 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003235
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003236 if (end-slen > start)
3237 start = end - slen;
3238 if (end-start >= slen)
3239 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3240 else
3241 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003242}
3243
3244
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003245PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003246"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003247\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003248Encodes S using the codec registered for encoding. encoding defaults\n\
3249to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003250handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003251a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3252'xmlcharrefreplace' as well as any other name registered with\n\
3253codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003254
3255static PyObject *
3256string_encode(PyStringObject *self, PyObject *args)
3257{
3258 char *encoding = NULL;
3259 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003260 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003261
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003262 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3263 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003264 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003265 if (v == NULL)
3266 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003267 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3268 PyErr_Format(PyExc_TypeError,
3269 "encoder did not return a string/unicode object "
3270 "(type=%.400s)",
3271 v->ob_type->tp_name);
3272 Py_DECREF(v);
3273 return NULL;
3274 }
3275 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003276
3277 onError:
3278 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003279}
3280
3281
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003282PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003283"S.decode([encoding[,errors]]) -> object\n\
3284\n\
3285Decodes S using the codec registered for encoding. encoding defaults\n\
3286to the default encoding. errors may be given to set a different error\n\
3287handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003288a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3289as well as any other name registerd with codecs.register_error that is\n\
3290able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003291
3292static PyObject *
3293string_decode(PyStringObject *self, PyObject *args)
3294{
3295 char *encoding = NULL;
3296 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003297 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003298
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003299 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3300 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003301 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003302 if (v == NULL)
3303 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003304 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3305 PyErr_Format(PyExc_TypeError,
3306 "decoder did not return a string/unicode object "
3307 "(type=%.400s)",
3308 v->ob_type->tp_name);
3309 Py_DECREF(v);
3310 return NULL;
3311 }
3312 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003313
3314 onError:
3315 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003316}
3317
3318
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003319PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003320"S.expandtabs([tabsize]) -> string\n\
3321\n\
3322Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003323If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324
3325static PyObject*
3326string_expandtabs(PyStringObject *self, PyObject *args)
3327{
3328 const char *e, *p;
3329 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003330 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003331 PyObject *u;
3332 int tabsize = 8;
3333
3334 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3335 return NULL;
3336
Thomas Wouters7e474022000-07-16 12:04:32 +00003337 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338 i = j = 0;
3339 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3340 for (p = PyString_AS_STRING(self); p < e; p++)
3341 if (*p == '\t') {
3342 if (tabsize > 0)
3343 j += tabsize - (j % tabsize);
3344 }
3345 else {
3346 j++;
3347 if (*p == '\n' || *p == '\r') {
3348 i += j;
3349 j = 0;
3350 }
3351 }
3352
3353 /* Second pass: create output string and fill it */
3354 u = PyString_FromStringAndSize(NULL, i + j);
3355 if (!u)
3356 return NULL;
3357
3358 j = 0;
3359 q = PyString_AS_STRING(u);
3360
3361 for (p = PyString_AS_STRING(self); p < e; p++)
3362 if (*p == '\t') {
3363 if (tabsize > 0) {
3364 i = tabsize - (j % tabsize);
3365 j += i;
3366 while (i--)
3367 *q++ = ' ';
3368 }
3369 }
3370 else {
3371 j++;
3372 *q++ = *p;
3373 if (*p == '\n' || *p == '\r')
3374 j = 0;
3375 }
3376
3377 return u;
3378}
3379
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003380Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003381pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382{
3383 PyObject *u;
3384
3385 if (left < 0)
3386 left = 0;
3387 if (right < 0)
3388 right = 0;
3389
Tim Peters8fa5dd02001-09-12 02:18:30 +00003390 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003391 Py_INCREF(self);
3392 return (PyObject *)self;
3393 }
3394
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003395 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003396 left + PyString_GET_SIZE(self) + right);
3397 if (u) {
3398 if (left)
3399 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003400 memcpy(PyString_AS_STRING(u) + left,
3401 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402 PyString_GET_SIZE(self));
3403 if (right)
3404 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3405 fill, right);
3406 }
3407
3408 return u;
3409}
3410
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003411PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003412"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003413"\n"
3414"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003415"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003416
3417static PyObject *
3418string_ljust(PyStringObject *self, PyObject *args)
3419{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003420 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421 char fillchar = ' ';
3422
Thomas Wouters4abb3662006-04-19 14:50:15 +00003423 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003424 return NULL;
3425
Tim Peters8fa5dd02001-09-12 02:18:30 +00003426 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427 Py_INCREF(self);
3428 return (PyObject*) self;
3429 }
3430
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003431 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432}
3433
3434
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003435PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003436"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003437"\n"
3438"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003439"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440
3441static PyObject *
3442string_rjust(PyStringObject *self, PyObject *args)
3443{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003444 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445 char fillchar = ' ';
3446
Thomas Wouters4abb3662006-04-19 14:50:15 +00003447 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448 return NULL;
3449
Tim Peters8fa5dd02001-09-12 02:18:30 +00003450 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451 Py_INCREF(self);
3452 return (PyObject*) self;
3453 }
3454
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456}
3457
3458
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003459PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003461"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003462"Return S centered in a string of length width. Padding is\n"
3463"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464
3465static PyObject *
3466string_center(PyStringObject *self, PyObject *args)
3467{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003468 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003469 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003470 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003471
Thomas Wouters4abb3662006-04-19 14:50:15 +00003472 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473 return NULL;
3474
Tim Peters8fa5dd02001-09-12 02:18:30 +00003475 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003476 Py_INCREF(self);
3477 return (PyObject*) self;
3478 }
3479
3480 marg = width - PyString_GET_SIZE(self);
3481 left = marg / 2 + (marg & width & 1);
3482
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003483 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003484}
3485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003486PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003487"S.zfill(width) -> string\n"
3488"\n"
3489"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003490"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003491
3492static PyObject *
3493string_zfill(PyStringObject *self, PyObject *args)
3494{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003495 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003496 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003497 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003498 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003499
Thomas Wouters4abb3662006-04-19 14:50:15 +00003500 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003501 return NULL;
3502
3503 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003504 if (PyString_CheckExact(self)) {
3505 Py_INCREF(self);
3506 return (PyObject*) self;
3507 }
3508 else
3509 return PyString_FromStringAndSize(
3510 PyString_AS_STRING(self),
3511 PyString_GET_SIZE(self)
3512 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003513 }
3514
3515 fill = width - PyString_GET_SIZE(self);
3516
3517 s = pad(self, fill, 0, '0');
3518
3519 if (s == NULL)
3520 return NULL;
3521
3522 p = PyString_AS_STRING(s);
3523 if (p[fill] == '+' || p[fill] == '-') {
3524 /* move sign to beginning of string */
3525 p[0] = p[fill];
3526 p[fill] = '0';
3527 }
3528
3529 return (PyObject*) s;
3530}
3531
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003532PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003533"S.isspace() -> bool\n\
3534\n\
3535Return True if all characters in S are whitespace\n\
3536and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537
3538static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003539string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540{
Fred Drakeba096332000-07-09 07:04:36 +00003541 register const unsigned char *p
3542 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003543 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545 /* Shortcut for single character strings */
3546 if (PyString_GET_SIZE(self) == 1 &&
3547 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003550 /* Special case for empty strings */
3551 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003552 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003553
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554 e = p + PyString_GET_SIZE(self);
3555 for (; p < e; p++) {
3556 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003557 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003559 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003560}
3561
3562
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003563PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003566Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003567and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003568
3569static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003570string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571{
Fred Drakeba096332000-07-09 07:04:36 +00003572 register const unsigned char *p
3573 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574 register const unsigned char *e;
3575
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576 /* Shortcut for single character strings */
3577 if (PyString_GET_SIZE(self) == 1 &&
3578 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003579 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003580
3581 /* Special case for empty strings */
3582 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003583 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584
3585 e = p + PyString_GET_SIZE(self);
3586 for (; p < e; p++) {
3587 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003590 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003591}
3592
3593
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003594PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003595"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003597Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003598and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599
3600static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003601string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602{
Fred Drakeba096332000-07-09 07:04:36 +00003603 register const unsigned char *p
3604 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 register const unsigned char *e;
3606
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607 /* Shortcut for single character strings */
3608 if (PyString_GET_SIZE(self) == 1 &&
3609 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003611
3612 /* Special case for empty strings */
3613 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003615
3616 e = p + PyString_GET_SIZE(self);
3617 for (; p < e; p++) {
3618 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003620 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003622}
3623
3624
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003625PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003628Return True if all characters in S are digits\n\
3629and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630
3631static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003632string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633{
Fred Drakeba096332000-07-09 07:04:36 +00003634 register const unsigned char *p
3635 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003636 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638 /* Shortcut for single character strings */
3639 if (PyString_GET_SIZE(self) == 1 &&
3640 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003643 /* Special case for empty strings */
3644 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003646
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647 e = p + PyString_GET_SIZE(self);
3648 for (; p < e; p++) {
3649 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003652 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653}
3654
3655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003656PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003659Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003660at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661
3662static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003663string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664{
Fred Drakeba096332000-07-09 07:04:36 +00003665 register const unsigned char *p
3666 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003667 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 int cased;
3669
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 /* Shortcut for single character strings */
3671 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003674 /* Special case for empty strings */
3675 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003677
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 e = p + PyString_GET_SIZE(self);
3679 cased = 0;
3680 for (; p < e; p++) {
3681 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683 else if (!cased && islower(*p))
3684 cased = 1;
3685 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687}
3688
3689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003690PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003693Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003694at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695
3696static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003697string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698{
Fred Drakeba096332000-07-09 07:04:36 +00003699 register const unsigned char *p
3700 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003701 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 int cased;
3703
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704 /* Shortcut for single character strings */
3705 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003708 /* Special case for empty strings */
3709 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003711
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 e = p + PyString_GET_SIZE(self);
3713 cased = 0;
3714 for (; p < e; p++) {
3715 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717 else if (!cased && isupper(*p))
3718 cased = 1;
3719 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721}
3722
3723
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003724PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003727Return True if S is a titlecased string and there is at least one\n\
3728character in S, i.e. uppercase characters may only follow uncased\n\
3729characters and lowercase characters only cased ones. Return False\n\
3730otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731
3732static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003733string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734{
Fred Drakeba096332000-07-09 07:04:36 +00003735 register const unsigned char *p
3736 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003737 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 int cased, previous_is_cased;
3739
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740 /* Shortcut for single character strings */
3741 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003742 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003744 /* Special case for empty strings */
3745 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003746 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003747
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 e = p + PyString_GET_SIZE(self);
3749 cased = 0;
3750 previous_is_cased = 0;
3751 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003752 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753
3754 if (isupper(ch)) {
3755 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003756 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 previous_is_cased = 1;
3758 cased = 1;
3759 }
3760 else if (islower(ch)) {
3761 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003762 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763 previous_is_cased = 1;
3764 cased = 1;
3765 }
3766 else
3767 previous_is_cased = 0;
3768 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003769 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770}
3771
3772
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003773PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003774"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775\n\
3776Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003777Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003778is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780static PyObject*
3781string_splitlines(PyStringObject *self, PyObject *args)
3782{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003783 register Py_ssize_t i;
3784 register Py_ssize_t j;
3785 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003786 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 PyObject *list;
3788 PyObject *str;
3789 char *data;
3790
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003791 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 return NULL;
3793
3794 data = PyString_AS_STRING(self);
3795 len = PyString_GET_SIZE(self);
3796
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 list = PyList_New(0);
3798 if (!list)
3799 goto onError;
3800
3801 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003802 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003803
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804 /* Find a line and append it */
3805 while (i < len && data[i] != '\n' && data[i] != '\r')
3806 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003807
3808 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 if (i < len) {
3811 if (data[i] == '\r' && i + 1 < len &&
3812 data[i+1] == '\n')
3813 i += 2;
3814 else
3815 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003816 if (keepends)
3817 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003819 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 j = i;
3821 }
3822 if (j < len) {
3823 SPLIT_APPEND(data, j, len);
3824 }
3825
3826 return list;
3827
3828 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003829 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 return NULL;
3831}
3832
3833#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003834#undef SPLIT_ADD
3835#undef MAX_PREALLOC
3836#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003838static PyObject *
3839string_getnewargs(PyStringObject *v)
3840{
3841 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3842}
3843
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003844
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003845static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003846string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847 /* Counterparts of the obsolete stropmodule functions; except
3848 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3850 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003851 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003852 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3853 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003854 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3855 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3856 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3857 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3858 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3859 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3860 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003861 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3862 capitalize__doc__},
3863 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3864 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3865 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003866 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003867 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3868 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3869 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3870 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3871 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3872 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3873 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3874 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3875 startswith__doc__},
3876 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3877 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3878 swapcase__doc__},
3879 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3880 translate__doc__},
3881 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3882 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3883 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3884 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3885 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3886 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3887 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3888 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3889 expandtabs__doc__},
3890 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3891 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003892 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003893 {NULL, NULL} /* sentinel */
3894};
3895
Jeremy Hylton938ace62002-07-17 16:30:39 +00003896static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003897str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3898
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003899static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003900string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003901{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003902 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003903 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003904
Guido van Rossumae960af2001-08-30 03:11:59 +00003905 if (type != &PyString_Type)
3906 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003907 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3908 return NULL;
3909 if (x == NULL)
3910 return PyString_FromString("");
3911 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003912}
3913
Guido van Rossumae960af2001-08-30 03:11:59 +00003914static PyObject *
3915str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3916{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003917 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003918 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003919
3920 assert(PyType_IsSubtype(type, &PyString_Type));
3921 tmp = string_new(&PyString_Type, args, kwds);
3922 if (tmp == NULL)
3923 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003924 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003925 n = PyString_GET_SIZE(tmp);
3926 pnew = type->tp_alloc(type, n);
3927 if (pnew != NULL) {
3928 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003929 ((PyStringObject *)pnew)->ob_shash =
3930 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003931 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003932 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003933 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003934 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003935}
3936
Guido van Rossumcacfc072002-05-24 19:01:59 +00003937static PyObject *
3938basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3939{
3940 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003941 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003942 return NULL;
3943}
3944
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003945static PyObject *
3946string_mod(PyObject *v, PyObject *w)
3947{
3948 if (!PyString_Check(v)) {
3949 Py_INCREF(Py_NotImplemented);
3950 return Py_NotImplemented;
3951 }
3952 return PyString_Format(v, w);
3953}
3954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003955PyDoc_STRVAR(basestring_doc,
3956"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003957
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003958static PyNumberMethods string_as_number = {
3959 0, /*nb_add*/
3960 0, /*nb_subtract*/
3961 0, /*nb_multiply*/
3962 0, /*nb_divide*/
3963 string_mod, /*nb_remainder*/
3964};
3965
3966
Guido van Rossumcacfc072002-05-24 19:01:59 +00003967PyTypeObject PyBaseString_Type = {
3968 PyObject_HEAD_INIT(&PyType_Type)
3969 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003970 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003971 0,
3972 0,
3973 0, /* tp_dealloc */
3974 0, /* tp_print */
3975 0, /* tp_getattr */
3976 0, /* tp_setattr */
3977 0, /* tp_compare */
3978 0, /* tp_repr */
3979 0, /* tp_as_number */
3980 0, /* tp_as_sequence */
3981 0, /* tp_as_mapping */
3982 0, /* tp_hash */
3983 0, /* tp_call */
3984 0, /* tp_str */
3985 0, /* tp_getattro */
3986 0, /* tp_setattro */
3987 0, /* tp_as_buffer */
3988 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3989 basestring_doc, /* tp_doc */
3990 0, /* tp_traverse */
3991 0, /* tp_clear */
3992 0, /* tp_richcompare */
3993 0, /* tp_weaklistoffset */
3994 0, /* tp_iter */
3995 0, /* tp_iternext */
3996 0, /* tp_methods */
3997 0, /* tp_members */
3998 0, /* tp_getset */
3999 &PyBaseObject_Type, /* tp_base */
4000 0, /* tp_dict */
4001 0, /* tp_descr_get */
4002 0, /* tp_descr_set */
4003 0, /* tp_dictoffset */
4004 0, /* tp_init */
4005 0, /* tp_alloc */
4006 basestring_new, /* tp_new */
4007 0, /* tp_free */
4008};
4009
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004010PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011"str(object) -> string\n\
4012\n\
4013Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004014If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004015
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016PyTypeObject PyString_Type = {
4017 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004018 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004019 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004021 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004022 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023 (printfunc)string_print, /* tp_print */
4024 0, /* tp_getattr */
4025 0, /* tp_setattr */
4026 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004027 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004028 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004029 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004030 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 (hashfunc)string_hash, /* tp_hash */
4032 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004033 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004034 PyObject_GenericGetAttr, /* tp_getattro */
4035 0, /* tp_setattro */
4036 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004037 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004038 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039 string_doc, /* tp_doc */
4040 0, /* tp_traverse */
4041 0, /* tp_clear */
4042 (richcmpfunc)string_richcompare, /* tp_richcompare */
4043 0, /* tp_weaklistoffset */
4044 0, /* tp_iter */
4045 0, /* tp_iternext */
4046 string_methods, /* tp_methods */
4047 0, /* tp_members */
4048 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004049 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050 0, /* tp_dict */
4051 0, /* tp_descr_get */
4052 0, /* tp_descr_set */
4053 0, /* tp_dictoffset */
4054 0, /* tp_init */
4055 0, /* tp_alloc */
4056 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004057 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004058};
4059
4060void
Fred Drakeba096332000-07-09 07:04:36 +00004061PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004062{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004063 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004064 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004066 if (w == NULL || !PyString_Check(*pv)) {
4067 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004068 *pv = NULL;
4069 return;
4070 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 v = string_concat((PyStringObject *) *pv, w);
4072 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004073 *pv = v;
4074}
4075
Guido van Rossum013142a1994-08-30 08:19:36 +00004076void
Fred Drakeba096332000-07-09 07:04:36 +00004077PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004078{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 PyString_Concat(pv, w);
4080 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004081}
4082
4083
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084/* The following function breaks the notion that strings are immutable:
4085 it changes the size of a string. We get away with this only if there
4086 is only one module referencing the object. You can also think of it
4087 as creating a new string object and destroying the old one, only
4088 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004089 already be known to some other part of the code...
4090 Note that if there's not enough memory to resize the string, the original
4091 string object at *pv is deallocated, *pv is set to NULL, an "out of
4092 memory" exception is set, and -1 is returned. Else (on success) 0 is
4093 returned, and the value in *pv may or may not be the same as on input.
4094 As always, an extra byte is allocated for a trailing \0 byte (newsize
4095 does *not* include that), and a trailing \0 byte is stored.
4096*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004097
4098int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004099_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004101 register PyObject *v;
4102 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004103 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004104 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4105 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004106 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004107 Py_DECREF(v);
4108 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004109 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004111 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004112 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 _Py_ForgetReference(v);
4114 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004115 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004116 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004117 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004118 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004119 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004120 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004121 _Py_NewReference(*pv);
4122 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004123 sv->ob_size = newsize;
4124 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004125 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004126 return 0;
4127}
Guido van Rossume5372401993-03-16 12:15:04 +00004128
4129/* Helpers for formatstring */
4130
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004131Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004132getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004133{
Thomas Wouters977485d2006-02-16 15:59:12 +00004134 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004135 if (argidx < arglen) {
4136 (*p_argidx)++;
4137 if (arglen < 0)
4138 return args;
4139 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004140 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004141 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004142 PyErr_SetString(PyExc_TypeError,
4143 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004144 return NULL;
4145}
4146
Tim Peters38fd5b62000-09-21 05:43:11 +00004147/* Format codes
4148 * F_LJUST '-'
4149 * F_SIGN '+'
4150 * F_BLANK ' '
4151 * F_ALT '#'
4152 * F_ZERO '0'
4153 */
Guido van Rossume5372401993-03-16 12:15:04 +00004154#define F_LJUST (1<<0)
4155#define F_SIGN (1<<1)
4156#define F_BLANK (1<<2)
4157#define F_ALT (1<<3)
4158#define F_ZERO (1<<4)
4159
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004160Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004161formatfloat(char *buf, size_t buflen, int flags,
4162 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004163{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 /* fmt = '%#.' + `prec` + `type`
4165 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004166 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004167 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004168 x = PyFloat_AsDouble(v);
4169 if (x == -1.0 && PyErr_Occurred()) {
4170 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004171 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004172 }
Guido van Rossume5372401993-03-16 12:15:04 +00004173 if (prec < 0)
4174 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004175 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4176 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004177 /* Worst case length calc to ensure no buffer overrun:
4178
4179 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004180 fmt = %#.<prec>g
4181 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004182 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004183 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004184
4185 'f' formats:
4186 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4187 len = 1 + 50 + 1 + prec = 52 + prec
4188
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004190 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004191
4192 */
4193 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4194 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004195 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004196 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004197 return -1;
4198 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004199 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4200 (flags&F_ALT) ? "#" : "",
4201 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004202 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004203 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004204}
4205
Tim Peters38fd5b62000-09-21 05:43:11 +00004206/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4207 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4208 * Python's regular ints.
4209 * Return value: a new PyString*, or NULL if error.
4210 * . *pbuf is set to point into it,
4211 * *plen set to the # of chars following that.
4212 * Caller must decref it when done using pbuf.
4213 * The string starting at *pbuf is of the form
4214 * "-"? ("0x" | "0X")? digit+
4215 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004216 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004217 * There will be at least prec digits, zero-filled on the left if
4218 * necessary to get that many.
4219 * val object to be converted
4220 * flags bitmask of format flags; only F_ALT is looked at
4221 * prec minimum number of digits; 0-fill on left if needed
4222 * type a character in [duoxX]; u acts the same as d
4223 *
4224 * CAUTION: o, x and X conversions on regular ints can never
4225 * produce a '-' sign, but can for Python's unbounded ints.
4226 */
4227PyObject*
4228_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4229 char **pbuf, int *plen)
4230{
4231 PyObject *result = NULL;
4232 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004233 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004234 int sign; /* 1 if '-', else 0 */
4235 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004236 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004237 int numdigits; /* len == numnondigits + numdigits */
4238 int numnondigits = 0;
4239
4240 switch (type) {
4241 case 'd':
4242 case 'u':
4243 result = val->ob_type->tp_str(val);
4244 break;
4245 case 'o':
4246 result = val->ob_type->tp_as_number->nb_oct(val);
4247 break;
4248 case 'x':
4249 case 'X':
4250 numnondigits = 2;
4251 result = val->ob_type->tp_as_number->nb_hex(val);
4252 break;
4253 default:
4254 assert(!"'type' not in [duoxX]");
4255 }
4256 if (!result)
4257 return NULL;
4258
4259 /* To modify the string in-place, there can only be one reference. */
4260 if (result->ob_refcnt != 1) {
4261 PyErr_BadInternalCall();
4262 return NULL;
4263 }
4264 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004265 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004266 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004267 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4268 return NULL;
4269 }
4270 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004271 if (buf[len-1] == 'L') {
4272 --len;
4273 buf[len] = '\0';
4274 }
4275 sign = buf[0] == '-';
4276 numnondigits += sign;
4277 numdigits = len - numnondigits;
4278 assert(numdigits > 0);
4279
Tim Petersfff53252001-04-12 18:38:48 +00004280 /* Get rid of base marker unless F_ALT */
4281 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004282 /* Need to skip 0x, 0X or 0. */
4283 int skipped = 0;
4284 switch (type) {
4285 case 'o':
4286 assert(buf[sign] == '0');
4287 /* If 0 is only digit, leave it alone. */
4288 if (numdigits > 1) {
4289 skipped = 1;
4290 --numdigits;
4291 }
4292 break;
4293 case 'x':
4294 case 'X':
4295 assert(buf[sign] == '0');
4296 assert(buf[sign + 1] == 'x');
4297 skipped = 2;
4298 numnondigits -= 2;
4299 break;
4300 }
4301 if (skipped) {
4302 buf += skipped;
4303 len -= skipped;
4304 if (sign)
4305 buf[0] = '-';
4306 }
4307 assert(len == numnondigits + numdigits);
4308 assert(numdigits > 0);
4309 }
4310
4311 /* Fill with leading zeroes to meet minimum width. */
4312 if (prec > numdigits) {
4313 PyObject *r1 = PyString_FromStringAndSize(NULL,
4314 numnondigits + prec);
4315 char *b1;
4316 if (!r1) {
4317 Py_DECREF(result);
4318 return NULL;
4319 }
4320 b1 = PyString_AS_STRING(r1);
4321 for (i = 0; i < numnondigits; ++i)
4322 *b1++ = *buf++;
4323 for (i = 0; i < prec - numdigits; i++)
4324 *b1++ = '0';
4325 for (i = 0; i < numdigits; i++)
4326 *b1++ = *buf++;
4327 *b1 = '\0';
4328 Py_DECREF(result);
4329 result = r1;
4330 buf = PyString_AS_STRING(result);
4331 len = numnondigits + prec;
4332 }
4333
4334 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004335 if (type == 'X') {
4336 /* Need to convert all lower case letters to upper case.
4337 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004338 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004339 if (buf[i] >= 'a' && buf[i] <= 'x')
4340 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004341 }
4342 *pbuf = buf;
4343 *plen = len;
4344 return result;
4345}
4346
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004347Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004348formatint(char *buf, size_t buflen, int flags,
4349 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004350{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004351 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004352 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4353 + 1 + 1 = 24 */
4354 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004355 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004356 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004357
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004358 x = PyInt_AsLong(v);
4359 if (x == -1 && PyErr_Occurred()) {
4360 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004361 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004362 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004363 if (x < 0 && type == 'u') {
4364 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004365 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004366 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4367 sign = "-";
4368 else
4369 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004370 if (prec < 0)
4371 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004372
4373 if ((flags & F_ALT) &&
4374 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004375 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376 * of issues that cause pain:
4377 * - when 0 is being converted, the C standard leaves off
4378 * the '0x' or '0X', which is inconsistent with other
4379 * %#x/%#X conversions and inconsistent with Python's
4380 * hex() function
4381 * - there are platforms that violate the standard and
4382 * convert 0 with the '0x' or '0X'
4383 * (Metrowerks, Compaq Tru64)
4384 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004385 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004386 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004387 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004388 * We can achieve the desired consistency by inserting our
4389 * own '0x' or '0X' prefix, and substituting %x/%X in place
4390 * of %#x/%#X.
4391 *
4392 * Note that this is the same approach as used in
4393 * formatint() in unicodeobject.c
4394 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004395 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4396 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004397 }
4398 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4400 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 prec, type);
4402 }
4403
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004404 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4405 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004406 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004407 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004408 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004409 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004410 return -1;
4411 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004412 if (sign[0])
4413 PyOS_snprintf(buf, buflen, fmt, -x);
4414 else
4415 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004416 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004417}
4418
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004419Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004420formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004421{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004422 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004423 if (PyString_Check(v)) {
4424 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004425 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004426 }
4427 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004428 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004429 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004430 }
4431 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004432 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004433}
4434
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004435/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4436
4437 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4438 chars are formatted. XXX This is a magic number. Each formatting
4439 routine does bounds checking to ensure no overflow, but a better
4440 solution may be to malloc a buffer of appropriate size for each
4441 format. For now, the current solution is sufficient.
4442*/
4443#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004444
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004445PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004446PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004447{
4448 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004449 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004450 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004451 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004452 PyObject *result, *orig_args;
4453#ifdef Py_USING_UNICODE
4454 PyObject *v, *w;
4455#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004456 PyObject *dict = NULL;
4457 if (format == NULL || !PyString_Check(format) || args == NULL) {
4458 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004459 return NULL;
4460 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004461 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004462 fmt = PyString_AS_STRING(format);
4463 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004464 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004465 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004466 if (result == NULL)
4467 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004468 res = PyString_AsString(result);
4469 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004470 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004471 argidx = 0;
4472 }
4473 else {
4474 arglen = -1;
4475 argidx = -2;
4476 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004477 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4478 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004479 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004480 while (--fmtcnt >= 0) {
4481 if (*fmt != '%') {
4482 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004483 rescnt = fmtcnt + 100;
4484 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004486 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004487 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004488 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004489 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004490 }
4491 *res++ = *fmt++;
4492 }
4493 else {
4494 /* Got a format specifier */
4495 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004496 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004497 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004498 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004499 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 PyObject *v = NULL;
4501 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004502 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004503 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004504 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004505 char formatbuf[FORMATBUFLEN];
4506 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004507#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004508 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004509 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004510#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004511
Guido van Rossumda9c2711996-12-05 21:58:58 +00004512 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 if (*fmt == '(') {
4514 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004515 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004517 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004518
4519 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004521 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004522 goto error;
4523 }
4524 ++fmt;
4525 --fmtcnt;
4526 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004527 /* Skip over balanced parentheses */
4528 while (pcount > 0 && --fmtcnt >= 0) {
4529 if (*fmt == ')')
4530 --pcount;
4531 else if (*fmt == '(')
4532 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004533 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004534 }
4535 keylen = fmt - keystart - 1;
4536 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004537 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004538 "incomplete format key");
4539 goto error;
4540 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 key = PyString_FromStringAndSize(keystart,
4542 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004543 if (key == NULL)
4544 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004545 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004546 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004547 args_owned = 0;
4548 }
4549 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 if (args == NULL) {
4552 goto error;
4553 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004554 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004555 arglen = -1;
4556 argidx = -2;
4557 }
Guido van Rossume5372401993-03-16 12:15:04 +00004558 while (--fmtcnt >= 0) {
4559 switch (c = *fmt++) {
4560 case '-': flags |= F_LJUST; continue;
4561 case '+': flags |= F_SIGN; continue;
4562 case ' ': flags |= F_BLANK; continue;
4563 case '#': flags |= F_ALT; continue;
4564 case '0': flags |= F_ZERO; continue;
4565 }
4566 break;
4567 }
4568 if (c == '*') {
4569 v = getnextarg(args, arglen, &argidx);
4570 if (v == NULL)
4571 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 if (!PyInt_Check(v)) {
4573 PyErr_SetString(PyExc_TypeError,
4574 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004575 goto error;
4576 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004578 if (width < 0) {
4579 flags |= F_LJUST;
4580 width = -width;
4581 }
Guido van Rossume5372401993-03-16 12:15:04 +00004582 if (--fmtcnt >= 0)
4583 c = *fmt++;
4584 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004585 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004586 width = c - '0';
4587 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004588 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004589 if (!isdigit(c))
4590 break;
4591 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 PyErr_SetString(
4593 PyExc_ValueError,
4594 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004595 goto error;
4596 }
4597 width = width*10 + (c - '0');
4598 }
4599 }
4600 if (c == '.') {
4601 prec = 0;
4602 if (--fmtcnt >= 0)
4603 c = *fmt++;
4604 if (c == '*') {
4605 v = getnextarg(args, arglen, &argidx);
4606 if (v == NULL)
4607 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004608 if (!PyInt_Check(v)) {
4609 PyErr_SetString(
4610 PyExc_TypeError,
4611 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004612 goto error;
4613 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004614 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004615 if (prec < 0)
4616 prec = 0;
4617 if (--fmtcnt >= 0)
4618 c = *fmt++;
4619 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004620 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004621 prec = c - '0';
4622 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004623 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004624 if (!isdigit(c))
4625 break;
4626 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 PyErr_SetString(
4628 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004629 "prec too big");
4630 goto error;
4631 }
4632 prec = prec*10 + (c - '0');
4633 }
4634 }
4635 } /* prec */
4636 if (fmtcnt >= 0) {
4637 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004638 if (--fmtcnt >= 0)
4639 c = *fmt++;
4640 }
4641 }
4642 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004643 PyErr_SetString(PyExc_ValueError,
4644 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004645 goto error;
4646 }
4647 if (c != '%') {
4648 v = getnextarg(args, arglen, &argidx);
4649 if (v == NULL)
4650 goto error;
4651 }
4652 sign = 0;
4653 fill = ' ';
4654 switch (c) {
4655 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004656 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004657 len = 1;
4658 break;
4659 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004660#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004661 if (PyUnicode_Check(v)) {
4662 fmt = fmt_start;
4663 argidx = argidx_start;
4664 goto unicode;
4665 }
Georg Brandld45014b2005-10-01 17:06:00 +00004666#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004667 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004668#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004669 if (temp != NULL && PyUnicode_Check(temp)) {
4670 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004671 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004672 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004673 goto unicode;
4674 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004675#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004676 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004677 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004678 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004679 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004680 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004681 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004682 if (!PyString_Check(temp)) {
4683 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004684 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004685 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004686 goto error;
4687 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004688 pbuf = PyString_AS_STRING(temp);
4689 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004690 if (prec >= 0 && len > prec)
4691 len = prec;
4692 break;
4693 case 'i':
4694 case 'd':
4695 case 'u':
4696 case 'o':
4697 case 'x':
4698 case 'X':
4699 if (c == 'i')
4700 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004701 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004702 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004703 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004704 prec, c, &pbuf, &ilen);
4705 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004706 if (!temp)
4707 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004708 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004709 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 else {
4711 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004712 len = formatint(pbuf,
4713 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004714 flags, prec, c, v);
4715 if (len < 0)
4716 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004717 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004718 }
4719 if (flags & F_ZERO)
4720 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004721 break;
4722 case 'e':
4723 case 'E':
4724 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004725 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004726 case 'g':
4727 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004728 if (c == 'F')
4729 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004730 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004731 len = formatfloat(pbuf, sizeof(formatbuf),
4732 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004733 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004734 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004735 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004736 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004737 fill = '0';
4738 break;
4739 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004740#ifdef Py_USING_UNICODE
4741 if (PyUnicode_Check(v)) {
4742 fmt = fmt_start;
4743 argidx = argidx_start;
4744 goto unicode;
4745 }
4746#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004747 pbuf = formatbuf;
4748 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004749 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004750 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004751 break;
4752 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004753 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004754 "unsupported format character '%c' (0x%x) "
4755 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004756 c, c,
4757 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004758 goto error;
4759 }
4760 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004761 if (*pbuf == '-' || *pbuf == '+') {
4762 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004763 len--;
4764 }
4765 else if (flags & F_SIGN)
4766 sign = '+';
4767 else if (flags & F_BLANK)
4768 sign = ' ';
4769 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004771 }
4772 if (width < len)
4773 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004774 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004775 reslen -= rescnt;
4776 rescnt = width + fmtcnt + 100;
4777 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004778 if (reslen < 0) {
4779 Py_DECREF(result);
4780 return PyErr_NoMemory();
4781 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004782 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004783 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004784 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004785 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004786 }
4787 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004788 if (fill != ' ')
4789 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004790 rescnt--;
4791 if (width > len)
4792 width--;
4793 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004794 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4795 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004796 assert(pbuf[1] == c);
4797 if (fill != ' ') {
4798 *res++ = *pbuf++;
4799 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004800 }
Tim Petersfff53252001-04-12 18:38:48 +00004801 rescnt -= 2;
4802 width -= 2;
4803 if (width < 0)
4804 width = 0;
4805 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004806 }
4807 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004808 do {
4809 --rescnt;
4810 *res++ = fill;
4811 } while (--width > len);
4812 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 if (fill == ' ') {
4814 if (sign)
4815 *res++ = sign;
4816 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004817 (c == 'x' || c == 'X')) {
4818 assert(pbuf[0] == '0');
4819 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004820 *res++ = *pbuf++;
4821 *res++ = *pbuf++;
4822 }
4823 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004824 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004825 res += len;
4826 rescnt -= len;
4827 while (--width >= len) {
4828 --rescnt;
4829 *res++ = ' ';
4830 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004831 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004832 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004833 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004834 goto error;
4835 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004836 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004837 } /* '%' */
4838 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004839 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004840 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004841 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004842 goto error;
4843 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004844 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004845 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004846 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004847 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004848 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004849
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004850#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004851 unicode:
4852 if (args_owned) {
4853 Py_DECREF(args);
4854 args_owned = 0;
4855 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004856 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004857 if (PyTuple_Check(orig_args) && argidx > 0) {
4858 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004859 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004860 v = PyTuple_New(n);
4861 if (v == NULL)
4862 goto error;
4863 while (--n >= 0) {
4864 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4865 Py_INCREF(w);
4866 PyTuple_SET_ITEM(v, n, w);
4867 }
4868 args = v;
4869 } else {
4870 Py_INCREF(orig_args);
4871 args = orig_args;
4872 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004873 args_owned = 1;
4874 /* Take what we have of the result and let the Unicode formatting
4875 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004876 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004877 if (_PyString_Resize(&result, rescnt))
4878 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004879 fmtcnt = PyString_GET_SIZE(format) - \
4880 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004881 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4882 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004883 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004884 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004885 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004886 if (v == NULL)
4887 goto error;
4888 /* Paste what we have (result) to what the Unicode formatting
4889 function returned (v) and return the result (or error) */
4890 w = PyUnicode_Concat(result, v);
4891 Py_DECREF(result);
4892 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004893 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004894 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004895#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004896
Guido van Rossume5372401993-03-16 12:15:04 +00004897 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004898 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004899 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004900 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004901 }
Guido van Rossume5372401993-03-16 12:15:04 +00004902 return NULL;
4903}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004904
Guido van Rossum2a61e741997-01-18 07:55:05 +00004905void
Fred Drakeba096332000-07-09 07:04:36 +00004906PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004907{
4908 register PyStringObject *s = (PyStringObject *)(*p);
4909 PyObject *t;
4910 if (s == NULL || !PyString_Check(s))
4911 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004912 /* If it's a string subclass, we don't really know what putting
4913 it in the interned dict might do. */
4914 if (!PyString_CheckExact(s))
4915 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004916 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004917 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004918 if (interned == NULL) {
4919 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004920 if (interned == NULL) {
4921 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004923 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004924 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004925 t = PyDict_GetItem(interned, (PyObject *)s);
4926 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004927 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004928 Py_DECREF(*p);
4929 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004930 return;
4931 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004932
Armin Rigo79f7ad22004-08-07 19:27:39 +00004933 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004934 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004935 return;
4936 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004937 /* The two references in interned are not counted by refcnt.
4938 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004939 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004940 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941}
4942
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004943void
4944PyString_InternImmortal(PyObject **p)
4945{
4946 PyString_InternInPlace(p);
4947 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4948 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4949 Py_INCREF(*p);
4950 }
4951}
4952
Guido van Rossum2a61e741997-01-18 07:55:05 +00004953
4954PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004955PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004956{
4957 PyObject *s = PyString_FromString(cp);
4958 if (s == NULL)
4959 return NULL;
4960 PyString_InternInPlace(&s);
4961 return s;
4962}
4963
Guido van Rossum8cf04761997-08-02 02:57:45 +00004964void
Fred Drakeba096332000-07-09 07:04:36 +00004965PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004966{
4967 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004968 for (i = 0; i < UCHAR_MAX + 1; i++) {
4969 Py_XDECREF(characters[i]);
4970 characters[i] = NULL;
4971 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004972 Py_XDECREF(nullstring);
4973 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004974}
Barry Warsawa903ad982001-02-23 16:40:48 +00004975
Barry Warsawa903ad982001-02-23 16:40:48 +00004976void _Py_ReleaseInternedStrings(void)
4977{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004978 PyObject *keys;
4979 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004980 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004981
4982 if (interned == NULL || !PyDict_Check(interned))
4983 return;
4984 keys = PyDict_Keys(interned);
4985 if (keys == NULL || !PyList_Check(keys)) {
4986 PyErr_Clear();
4987 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004988 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004989
4990 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4991 detector, interned strings are not forcibly deallocated; rather, we
4992 give them their stolen references back, and then clear and DECREF
4993 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004994
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004995 fprintf(stderr, "releasing interned strings\n");
4996 n = PyList_GET_SIZE(keys);
4997 for (i = 0; i < n; i++) {
4998 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4999 switch (s->ob_sstate) {
5000 case SSTATE_NOT_INTERNED:
5001 /* XXX Shouldn't happen */
5002 break;
5003 case SSTATE_INTERNED_IMMORTAL:
5004 s->ob_refcnt += 1;
5005 break;
5006 case SSTATE_INTERNED_MORTAL:
5007 s->ob_refcnt += 2;
5008 break;
5009 default:
5010 Py_FatalError("Inconsistent interned string state.");
5011 }
5012 s->ob_sstate = SSTATE_NOT_INTERNED;
5013 }
5014 Py_DECREF(keys);
5015 PyDict_Clear(interned);
5016 Py_DECREF(interned);
5017 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005018}