blob: 7e9378394b9c07d36790ed0ded3f06307d8a079f [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000027/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000028 For both PyString_FromString() and PyString_FromStringAndSize(), the
29 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000030 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000031
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000032 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000033 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000034
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000035 For PyString_FromStringAndSize(), the parameter the parameter `str' is
36 either NULL or else points to a string containing at least `size' bytes.
37 For PyString_FromStringAndSize(), the string in the `str' parameter does
38 not have to be null-terminated. (Therefore it is safe to construct a
39 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
40 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
41 bytes (setting the last byte to the null terminating character) and you can
42 fill in the data yourself. If `str' is non-NULL then the resulting
43 PyString object must be treated as immutable and you must not fill in nor
44 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 The PyObject member `op->ob_size', which denotes the number of "extra
47 items" in a variable-size object, will contain the number of bytes
48 allocated for string data, not counting the null terminating character. It
49 is therefore equal to the equal to the `size' parameter (for
50 PyString_FromStringAndSize()) or the length of the string in the `str'
51 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000054PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000055{
Tim Peters9e897f42001-05-09 07:37:07 +000056 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000057 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058 if (size == 0 && (op = nullstring) != NULL) {
59#ifdef COUNT_ALLOCS
60 null_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 if (size == 1 && str != NULL &&
66 (op = characters[*str & UCHAR_MAX]) != NULL)
67 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068#ifdef COUNT_ALLOCS
69 one_strings++;
70#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 Py_INCREF(op);
72 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000075 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000076 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000079 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000080 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000081 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000082 if (str != NULL)
83 memcpy(op->ob_sval, str, size);
84 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000085 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000086 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000087 PyObject *t = (PyObject *)op;
88 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000089 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000093 PyObject *t = (PyObject *)op;
94 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000095 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000103PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104{
Tim Peters62de65b2001-12-06 20:29:32 +0000105 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000106 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000107
108 assert(str != NULL);
109 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000110 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 Py_INCREF(op);
120 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000130 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000131 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000136 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000137 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000138 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000146 PyObject *t = (PyObject *)op;
147 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000148 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
177 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
178 ;
179
Tim Peters8931ff12006-05-13 23:28:20 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 }
254 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
255 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000275 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 memcpy(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
355
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 const char *encoding,
359 const char *errors)
360{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361 PyObject *v, *str;
362
363 str = PyString_FromStringAndSize(s, size);
364 if (str == NULL)
365 return NULL;
366 v = PyString_AsDecodedString(str, encoding, errors);
367 Py_DECREF(str);
368 return v;
369}
370
371PyObject *PyString_AsDecodedObject(PyObject *str,
372 const char *encoding,
373 const char *errors)
374{
375 PyObject *v;
376
377 if (!PyString_Check(str)) {
378 PyErr_BadArgument();
379 goto onError;
380 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382 if (encoding == NULL) {
383#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#else
386 PyErr_SetString(PyExc_ValueError, "no encoding specified");
387 goto onError;
388#endif
389 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390
391 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 v = PyCodec_Decode(str, encoding, errors);
393 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395
396 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000397
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 return NULL;
400}
401
402PyObject *PyString_AsDecodedString(PyObject *str,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v;
407
408 v = PyString_AsDecodedObject(str, encoding, errors);
409 if (v == NULL)
410 goto onError;
411
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000412#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000413 /* Convert Unicode to a string using the default encoding */
414 if (PyUnicode_Check(v)) {
415 PyObject *temp = v;
416 v = PyUnicode_AsEncodedString(v, NULL, NULL);
417 Py_DECREF(temp);
418 if (v == NULL)
419 goto onError;
420 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000421#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000422 if (!PyString_Check(v)) {
423 PyErr_Format(PyExc_TypeError,
424 "decoder did not return a string object (type=%.400s)",
425 v->ob_type->tp_name);
426 Py_DECREF(v);
427 goto onError;
428 }
429
430 return v;
431
432 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 return NULL;
434}
435
436PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000437 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 const char *encoding,
439 const char *errors)
440{
441 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000442
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443 str = PyString_FromStringAndSize(s, size);
444 if (str == NULL)
445 return NULL;
446 v = PyString_AsEncodedString(str, encoding, errors);
447 Py_DECREF(str);
448 return v;
449}
450
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 const char *encoding,
453 const char *errors)
454{
455 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000456
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457 if (!PyString_Check(str)) {
458 PyErr_BadArgument();
459 goto onError;
460 }
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462 if (encoding == NULL) {
463#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#else
466 PyErr_SetString(PyExc_ValueError, "no encoding specified");
467 goto onError;
468#endif
469 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470
471 /* Encode via the codec registry */
472 v = PyCodec_Encode(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
476 return v;
477
478 onError:
479 return NULL;
480}
481
482PyObject *PyString_AsEncodedString(PyObject *str,
483 const char *encoding,
484 const char *errors)
485{
486 PyObject *v;
487
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000488 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000489 if (v == NULL)
490 goto onError;
491
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000492#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 /* Convert Unicode to a string using the default encoding */
494 if (PyUnicode_Check(v)) {
495 PyObject *temp = v;
496 v = PyUnicode_AsEncodedString(v, NULL, NULL);
497 Py_DECREF(temp);
498 if (v == NULL)
499 goto onError;
500 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000501#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 if (!PyString_Check(v)) {
503 PyErr_Format(PyExc_TypeError,
504 "encoder did not return a string object (type=%.400s)",
505 v->ob_type->tp_name);
506 Py_DECREF(v);
507 goto onError;
508 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000511
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000512 onError:
513 return NULL;
514}
515
Guido van Rossum234f9421993-06-17 12:35:49 +0000516static void
Fred Drakeba096332000-07-09 07:04:36 +0000517string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000518{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000519 switch (PyString_CHECK_INTERNED(op)) {
520 case SSTATE_NOT_INTERNED:
521 break;
522
523 case SSTATE_INTERNED_MORTAL:
524 /* revive dead object temporarily for DelItem */
525 op->ob_refcnt = 3;
526 if (PyDict_DelItem(interned, op) != 0)
527 Py_FatalError(
528 "deletion of interned string failed");
529 break;
530
531 case SSTATE_INTERNED_IMMORTAL:
532 Py_FatalError("Immortal interned string died.");
533
534 default:
535 Py_FatalError("Inconsistent interned string state.");
536 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000537 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000538}
539
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540/* Unescape a backslash-escaped string. If unicode is non-zero,
541 the string is a u-literal. If recode_encoding is non-zero,
542 the string is UTF-8 encoded and should be re-encoded in the
543 specified encoding. */
544
545PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000548 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 const char *recode_encoding)
550{
551 int c;
552 char *p, *buf;
553 const char *end;
554 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000556 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000557 if (v == NULL)
558 return NULL;
559 p = buf = PyString_AsString(v);
560 end = s + len;
561 while (s < end) {
562 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000563 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000564#ifdef Py_USING_UNICODE
565 if (recode_encoding && (*s & 0x80)) {
566 PyObject *u, *w;
567 char *r;
568 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570 t = s;
571 /* Decode non-ASCII bytes as UTF-8. */
572 while (t < end && (*t & 0x80)) t++;
573 u = PyUnicode_DecodeUTF8(s, t - s, errors);
574 if(!u) goto failed;
575
576 /* Recode them in target encoding. */
577 w = PyUnicode_AsEncodedString(
578 u, recode_encoding, errors);
579 Py_DECREF(u);
580 if (!w) goto failed;
581
582 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000583 assert(PyString_Check(w));
584 r = PyString_AS_STRING(w);
585 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 memcpy(p, r, rn);
587 p += rn;
588 Py_DECREF(w);
589 s = t;
590 } else {
591 *p++ = *s++;
592 }
593#else
594 *p++ = *s++;
595#endif
596 continue;
597 }
598 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000599 if (s==end) {
600 PyErr_SetString(PyExc_ValueError,
601 "Trailing \\ in string");
602 goto failed;
603 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000604 switch (*s++) {
605 /* XXX This assumes ASCII! */
606 case '\n': break;
607 case '\\': *p++ = '\\'; break;
608 case '\'': *p++ = '\''; break;
609 case '\"': *p++ = '\"'; break;
610 case 'b': *p++ = '\b'; break;
611 case 'f': *p++ = '\014'; break; /* FF */
612 case 't': *p++ = '\t'; break;
613 case 'n': *p++ = '\n'; break;
614 case 'r': *p++ = '\r'; break;
615 case 'v': *p++ = '\013'; break; /* VT */
616 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 c = s[-1] - '0';
620 if ('0' <= *s && *s <= '7') {
621 c = (c<<3) + *s++ - '0';
622 if ('0' <= *s && *s <= '7')
623 c = (c<<3) + *s++ - '0';
624 }
625 *p++ = c;
626 break;
627 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000628 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 && isxdigit(Py_CHARMASK(s[1]))) {
630 unsigned int x = 0;
631 c = Py_CHARMASK(*s);
632 s++;
633 if (isdigit(c))
634 x = c - '0';
635 else if (islower(c))
636 x = 10 + c - 'a';
637 else
638 x = 10 + c - 'A';
639 x = x << 4;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x += c - '0';
644 else if (islower(c))
645 x += 10 + c - 'a';
646 else
647 x += 10 + c - 'A';
648 *p++ = x;
649 break;
650 }
651 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656 if (strcmp(errors, "replace") == 0) {
657 *p++ = '?';
658 } else if (strcmp(errors, "ignore") == 0)
659 /* do nothing */;
660 else {
661 PyErr_Format(PyExc_ValueError,
662 "decoding error; "
663 "unknown error handling code: %.400s",
664 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#ifndef Py_USING_UNICODE
668 case 'u':
669 case 'U':
670 case 'N':
671 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000672 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 "Unicode escapes not legal "
674 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000675 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677#endif
678 default:
679 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000680 s--;
681 goto non_esc; /* an arbitry number of unescaped
682 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000683 }
684 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000685 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 return v;
688 failed:
689 Py_DECREF(v);
690 return NULL;
691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694string_getsize(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return -1;
700 return len;
701}
702
703static /*const*/ char *
704string_getbuffer(register PyObject *op)
705{
706 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000707 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000708 if (PyString_AsStringAndSize(op, &s, &len))
709 return NULL;
710 return s;
711}
712
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000714PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (!PyString_Check(op))
717 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000718 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719}
720
721/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729int
730PyString_AsStringAndSize(register PyObject *obj,
731 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000732 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733{
734 if (s == NULL) {
735 PyErr_BadInternalCall();
736 return -1;
737 }
738
739 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 if (PyUnicode_Check(obj)) {
742 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
743 if (obj == NULL)
744 return -1;
745 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000746 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000747#endif
748 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_Format(PyExc_TypeError,
750 "expected string or Unicode object, "
751 "%.200s found", obj->ob_type->tp_name);
752 return -1;
753 }
754 }
755
756 *s = PyString_AS_STRING(obj);
757 if (len != NULL)
758 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000759 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_SetString(PyExc_TypeError,
761 "expected string without null bytes");
762 return -1;
763 }
764 return 0;
765}
766
Fredrik Lundhaf722372006-05-25 17:55:31 +0000767/* -------------------------------------------------------------------- */
Fredrik Lundha50d2012006-05-26 17:04:58 +0000768/* stringlib components */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769
Fredrik Lundha50d2012006-05-26 17:04:58 +0000770#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000771
Fredrik Lundhb9479482006-05-26 17:22:38 +0000772#define STRINGLIB_NEW PyString_FromStringAndSize
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773#define STRINGLIB_CMP memcmp
774
Fredrik Lundhb9479482006-05-26 17:22:38 +0000775#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000776
Fredrik Lundha50d2012006-05-26 17:04:58 +0000777#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000778
779#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000780#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000781
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783/* Methods */
784
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000785static int
Fred Drakeba096332000-07-09 07:04:36 +0000786string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000788 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000791
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000793 if (! PyString_CheckExact(op)) {
794 int ret;
795 /* A str subclass may have its own __str__ method. */
796 op = (PyStringObject *) PyObject_Str((PyObject *)op);
797 if (op == NULL)
798 return -1;
799 ret = string_print(op, fp, flags);
800 Py_DECREF(op);
801 return ret;
802 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000803 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000804#ifdef __VMS
805 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
806#else
807 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
808#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000811
Thomas Wouters7e474022000-07-16 12:04:32 +0000812 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000813 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000814 if (memchr(op->ob_sval, '\'', op->ob_size) &&
815 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '"';
817
818 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000819 for (i = 0; i < op->ob_size; i++) {
820 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000822 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000823 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000824 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000825 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000827 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000828 fprintf(fp, "\\r");
829 else if (c < ' ' || c >= 0x7f)
830 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000831 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000832 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000834 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000835 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836}
837
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000838PyObject *
839PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000842 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000843 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000844 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000845 PyErr_SetString(PyExc_OverflowError,
846 "string is too large to make repr");
847 }
848 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 }
852 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000853 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 register char c;
855 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 int quote;
857
Thomas Wouters7e474022000-07-16 12:04:32 +0000858 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000860 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000861 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000862 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000863 quote = '"';
864
Tim Peters9161c8b2001-12-03 01:55:38 +0000865 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 /* There's at least enough room for a hex escape
869 and a closing quote. */
870 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000872 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000874 else if (c == '\t')
875 *p++ = '\\', *p++ = 't';
876 else if (c == '\n')
877 *p++ = '\\', *p++ = 'n';
878 else if (c == '\r')
879 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000880 else if (c < ' ' || c >= 0x7f) {
881 /* For performance, we don't want to call
882 PyOS_snprintf here (extra layers of
883 function call). */
884 sprintf(p, "\\x%02x", c & 0xff);
885 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000886 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000887 else
888 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000889 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000890 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000891 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000893 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000894 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000895 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897}
898
Guido van Rossum189f1df2001-05-01 16:51:53 +0000899static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000900string_repr(PyObject *op)
901{
902 return PyString_Repr(op, 1);
903}
904
905static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000906string_str(PyObject *s)
907{
Tim Petersc9933152001-10-16 20:18:24 +0000908 assert(PyString_Check(s));
909 if (PyString_CheckExact(s)) {
910 Py_INCREF(s);
911 return s;
912 }
913 else {
914 /* Subtype -- return genuine string with the same value. */
915 PyStringObject *t = (PyStringObject *) s;
916 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
917 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000918}
919
Martin v. Löwis18e16552006-02-15 17:27:45 +0000920static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000921string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922{
923 return a->ob_size;
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
Andrew Dalke598710c2006-05-25 18:18:39 +0000929 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 register PyStringObject *op;
931 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000932#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000933 if (PyUnicode_Check(bb))
934 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000935#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000936 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000937 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000938 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 return NULL;
940 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000943 if ((a->ob_size == 0 || b->ob_size == 0) &&
944 PyString_CheckExact(a) && PyString_CheckExact(b)) {
945 if (a->ob_size == 0) {
946 Py_INCREF(bb);
947 return bb;
948 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949 Py_INCREF(a);
950 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951 }
952 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000953 if (size < 0) {
954 PyErr_SetString(PyExc_OverflowError,
955 "strings are too large to concat");
956 return NULL;
957 }
958
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000959 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000960 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000961 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000963 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000964 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000965 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000966 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
967 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000968 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970#undef b
971}
972
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000973static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000974string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000975{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000976 register Py_ssize_t i;
977 register Py_ssize_t j;
978 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000980 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981 if (n < 0)
982 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000983 /* watch out for overflows: the size can overflow int,
984 * and the # of bytes needed can overflow size_t
985 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000987 if (n && size / n != a->ob_size) {
988 PyErr_SetString(PyExc_OverflowError,
989 "repeated string is too long");
990 return NULL;
991 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000992 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 Py_INCREF(a);
994 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995 }
Tim Peterse7c05322004-06-27 17:24:49 +0000996 nbytes = (size_t)size;
997 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000998 PyErr_SetString(PyExc_OverflowError,
999 "repeated string is too long");
1000 return NULL;
1001 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001003 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001004 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001006 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001007 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001008 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001009 op->ob_sval[size] = '\0';
1010 if (a->ob_size == 1 && n > 0) {
1011 memset(op->ob_sval, a->ob_sval[0] , n);
1012 return (PyObject *) op;
1013 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001014 i = 0;
1015 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001016 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1017 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001018 }
1019 while (i < size) {
1020 j = (i <= size-i) ? i : size-i;
1021 memcpy(op->ob_sval+i, op->ob_sval, j);
1022 i += j;
1023 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025}
1026
1027/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1028
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001029static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001030string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001031 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001032 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033{
1034 if (i < 0)
1035 i = 0;
1036 if (j < 0)
1037 j = 0; /* Avoid signed/unsigned bug in next line */
1038 if (j > a->ob_size)
1039 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001040 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1041 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 Py_INCREF(a);
1043 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044 }
1045 if (j < i)
1046 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001047 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048}
1049
Guido van Rossum9284a572000-03-07 15:53:43 +00001050static int
Fred Drakeba096332000-07-09 07:04:36 +00001051string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001052{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001053 char *s = PyString_AS_STRING(a);
1054 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001055 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001056 Py_ssize_t pos;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001057
1058 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001059#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001060 if (PyUnicode_Check(el))
1061 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001062#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001063 if (!PyString_Check(el)) {
1064 PyErr_SetString(PyExc_TypeError,
1065 "'in <string>' requires string as left operand");
1066 return -1;
1067 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001068 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001069
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001070 if (len_sub == 0)
1071 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001072
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001073 pos = fastsearch(
1074 s, PyString_GET_SIZE(a),
1075 sub, len_sub, FAST_SEARCH
1076 );
Fredrik Lundh3a65d872006-05-26 17:31:41 +00001077
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001078 return (pos != -1);
Guido van Rossum9284a572000-03-07 15:53:43 +00001079}
1080
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001081static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001082string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001083{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001085 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088 return NULL;
1089 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001090 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001091 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001092 if (v == NULL)
1093 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001094 else {
1095#ifdef COUNT_ALLOCS
1096 one_strings++;
1097#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001098 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001099 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001100 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001101}
1102
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103static PyObject*
1104string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001105{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001107 Py_ssize_t len_a, len_b;
1108 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001109 PyObject *result;
1110
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001111 /* Make sure both arguments are strings. */
1112 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001113 result = Py_NotImplemented;
1114 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001115 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 if (a == b) {
1117 switch (op) {
1118 case Py_EQ:case Py_LE:case Py_GE:
1119 result = Py_True;
1120 goto out;
1121 case Py_NE:case Py_LT:case Py_GT:
1122 result = Py_False;
1123 goto out;
1124 }
1125 }
1126 if (op == Py_EQ) {
1127 /* Supporting Py_NE here as well does not save
1128 much time, since Py_NE is rarely used. */
1129 if (a->ob_size == b->ob_size
1130 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001131 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001132 a->ob_size) == 0)) {
1133 result = Py_True;
1134 } else {
1135 result = Py_False;
1136 }
1137 goto out;
1138 }
1139 len_a = a->ob_size; len_b = b->ob_size;
1140 min_len = (len_a < len_b) ? len_a : len_b;
1141 if (min_len > 0) {
1142 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1143 if (c==0)
1144 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1145 }else
1146 c = 0;
1147 if (c == 0)
1148 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1149 switch (op) {
1150 case Py_LT: c = c < 0; break;
1151 case Py_LE: c = c <= 0; break;
1152 case Py_EQ: assert(0); break; /* unreachable */
1153 case Py_NE: c = c != 0; break;
1154 case Py_GT: c = c > 0; break;
1155 case Py_GE: c = c >= 0; break;
1156 default:
1157 result = Py_NotImplemented;
1158 goto out;
1159 }
1160 result = c ? Py_True : Py_False;
1161 out:
1162 Py_INCREF(result);
1163 return result;
1164}
1165
1166int
1167_PyString_Eq(PyObject *o1, PyObject *o2)
1168{
1169 PyStringObject *a, *b;
1170 a = (PyStringObject*)o1;
1171 b = (PyStringObject*)o2;
1172 return a->ob_size == b->ob_size
1173 && *a->ob_sval == *b->ob_sval
1174 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001175}
1176
Guido van Rossum9bfef441993-03-29 10:43:31 +00001177static long
Fred Drakeba096332000-07-09 07:04:36 +00001178string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001180 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001181 register unsigned char *p;
1182 register long x;
1183
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 if (a->ob_shash != -1)
1185 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001186 len = a->ob_size;
1187 p = (unsigned char *) a->ob_sval;
1188 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001190 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191 x ^= a->ob_size;
1192 if (x == -1)
1193 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001194 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001195 return x;
1196}
1197
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001198#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1199
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200static PyObject*
1201string_subscript(PyStringObject* self, PyObject* item)
1202{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001203 PyNumberMethods *nb = item->ob_type->tp_as_number;
1204 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1205 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 if (i == -1 && PyErr_Occurred())
1207 return NULL;
1208 if (i < 0)
1209 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001210 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 }
1212 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 char* source_buf;
1215 char* result_buf;
1216 PyObject* result;
1217
Tim Petersae1d0c92006-03-17 03:29:34 +00001218 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 PyString_GET_SIZE(self),
1220 &start, &stop, &step, &slicelength) < 0) {
1221 return NULL;
1222 }
1223
1224 if (slicelength <= 0) {
1225 return PyString_FromStringAndSize("", 0);
1226 }
1227 else {
1228 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001229 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001230 if (result_buf == NULL)
1231 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232
Tim Petersae1d0c92006-03-17 03:29:34 +00001233 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 cur += step, i++) {
1235 result_buf[i] = source_buf[cur];
1236 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001237
1238 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 slicelength);
1240 PyMem_Free(result_buf);
1241 return result;
1242 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001243 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001245 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 "string indices must be integers");
1247 return NULL;
1248 }
1249}
1250
Martin v. Löwis18e16552006-02-15 17:27:45 +00001251static Py_ssize_t
1252string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253{
1254 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001256 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258 }
1259 *ptr = (void *)self->ob_sval;
1260 return self->ob_size;
1261}
1262
Martin v. Löwis18e16552006-02-15 17:27:45 +00001263static Py_ssize_t
1264string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001265{
Guido van Rossum045e6881997-09-08 18:30:11 +00001266 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001267 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268 return -1;
1269}
1270
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271static Py_ssize_t
1272string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273{
1274 if ( lenp )
1275 *lenp = self->ob_size;
1276 return 1;
1277}
1278
Martin v. Löwis18e16552006-02-15 17:27:45 +00001279static Py_ssize_t
1280string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001281{
1282 if ( index != 0 ) {
1283 PyErr_SetString(PyExc_SystemError,
1284 "accessing non-existent string segment");
1285 return -1;
1286 }
1287 *ptr = self->ob_sval;
1288 return self->ob_size;
1289}
1290
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001291static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001293 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001294 (ssizeargfunc)string_repeat, /*sq_repeat*/
1295 (ssizeargfunc)string_item, /*sq_item*/
1296 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001297 0, /*sq_ass_item*/
1298 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001299 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001300};
1301
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001302static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001303 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001304 (binaryfunc)string_subscript,
1305 0,
1306};
1307
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001308static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309 (readbufferproc)string_buffer_getreadbuf,
1310 (writebufferproc)string_buffer_getwritebuf,
1311 (segcountproc)string_buffer_getsegcount,
1312 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001313};
1314
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315
1316
1317#define LEFTSTRIP 0
1318#define RIGHTSTRIP 1
1319#define BOTHSTRIP 2
1320
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001321/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001322static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1323
1324#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001325
Andrew Dalke525eab32006-05-26 14:00:45 +00001326
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001327/* Don't call if length < 2 */
1328#define Py_STRING_MATCH(target, offset, pattern, length) \
1329 (target[offset] == pattern[0] && \
1330 target[offset+length-1] == pattern[length-1] && \
1331 !memcmp(target+offset+1, pattern+1, length-2) )
1332
1333
Andrew Dalke525eab32006-05-26 14:00:45 +00001334/* Overallocate the initial list to reduce the number of reallocs for small
1335 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1336 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1337 text (roughly 11 words per line) and field delimited data (usually 1-10
1338 fields). For large strings the split algorithms are bandwidth limited
1339 so increasing the preallocation likely will not improve things.*/
1340
1341#define MAX_PREALLOC 12
1342
1343/* 5 splits gives 6 elements */
1344#define PREALLOC_SIZE(maxsplit) \
1345 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1346
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347#define SPLIT_APPEND(data, left, right) \
1348 str = PyString_FromStringAndSize((data) + (left), \
1349 (right) - (left)); \
1350 if (str == NULL) \
1351 goto onError; \
1352 if (PyList_Append(list, str)) { \
1353 Py_DECREF(str); \
1354 goto onError; \
1355 } \
1356 else \
1357 Py_DECREF(str);
1358
Andrew Dalke02758d62006-05-26 15:21:01 +00001359#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001360 str = PyString_FromStringAndSize((data) + (left), \
1361 (right) - (left)); \
1362 if (str == NULL) \
1363 goto onError; \
1364 if (count < MAX_PREALLOC) { \
1365 PyList_SET_ITEM(list, count, str); \
1366 } else { \
1367 if (PyList_Append(list, str)) { \
1368 Py_DECREF(str); \
1369 goto onError; \
1370 } \
1371 else \
1372 Py_DECREF(str); \
1373 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001374 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001375
1376/* Always force the list to the expected size. */
1377#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1378
Andrew Dalke02758d62006-05-26 15:21:01 +00001379#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1380#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1381#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1382#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1383
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001384Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001385split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386{
Andrew Dalke525eab32006-05-26 14:00:45 +00001387 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001388 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001389 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390
1391 if (list == NULL)
1392 return NULL;
1393
Andrew Dalke02758d62006-05-26 15:21:01 +00001394 i = j = 0;
1395
1396 while (maxsplit-- > 0) {
1397 SKIP_SPACE(s, i, len);
1398 if (i==len) break;
1399 j = i; i++;
1400 SKIP_NONSPACE(s, i, len);
1401 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001403
1404 if (i < len) {
1405 /* Only occurs when maxsplit was reached */
1406 /* Skip any remaining whitespace and copy to end of string */
1407 SKIP_SPACE(s, i, len);
1408 if (i != len)
1409 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001410 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001411 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001413 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414 Py_DECREF(list);
1415 return NULL;
1416}
1417
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001418Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001419split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001420{
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001423 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001424
1425 if (list == NULL)
1426 return NULL;
1427
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001428 i = j = 0;
1429 while ((j < len) && (maxcount-- > 0)) {
1430 for(; j<len; j++) {
1431 /* I found that using memchr makes no difference */
1432 if (s[j] == ch) {
1433 SPLIT_ADD(s, i, j);
1434 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001435 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001436 }
1437 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001438 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001439 if (i <= len) {
1440 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001442 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 return list;
1444
1445 onError:
1446 Py_DECREF(list);
1447 return NULL;
1448}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001450PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001451"S.split([sep [,maxsplit]]) -> list of strings\n\
1452\n\
1453Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001454delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001455splits are done. If sep is not specified or is None, any\n\
1456whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457
1458static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001459string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001461 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001463 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001465#ifdef USE_FAST
1466 Py_ssize_t pos;
1467#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468
Martin v. Löwis9c830762006-04-13 08:37:17 +00001469 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001472 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475 if (PyString_Check(subobj)) {
1476 sub = PyString_AS_STRING(subobj);
1477 n = PyString_GET_SIZE(subobj);
1478 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001479#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 else if (PyUnicode_Check(subobj))
1481 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001482#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1484 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001485
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 if (n == 0) {
1487 PyErr_SetString(PyExc_ValueError, "empty separator");
1488 return NULL;
1489 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 else if (n == 1)
1491 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492
Andrew Dalke525eab32006-05-26 14:00:45 +00001493 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 if (list == NULL)
1495 return NULL;
1496
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001497#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001499 while (maxsplit-- > 0) {
1500 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1501 if (pos < 0)
1502 break;
1503 j = i+pos;
1504 SPLIT_ADD(s, i, j);
1505 i = j + n;
1506
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001508#else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1516 }
1517 }
1518 }
1519#endif
1520 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001521 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return list;
1523
Andrew Dalke525eab32006-05-26 14:00:45 +00001524 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001529PyDoc_STRVAR(partition__doc__,
1530"S.partition(sep) -> (head, sep, tail)\n\
1531\n\
1532Searches for the separator sep in S, and returns the part before it,\n\
1533the separator itself, and the part after it. If the separator is not\n\
1534found, returns S and two empty strings.");
1535
1536static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001537string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001539 const char *sep;
1540 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001541
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001545 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001546#ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001548 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001551 return NULL;
1552
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001553 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1557 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558}
1559
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001560PyDoc_STRVAR(rpartition__doc__,
1561"S.rpartition(sep) -> (head, sep, tail)\n\
1562\n\
1563Searches for the separator sep in S, starting at the end of S, and returns\n\
1564the part before it, the separator itself, and the part after it. If the\n\
1565separator is not found, returns S and two empty strings.");
1566
1567static PyObject *
1568string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569{
1570 const char *sep;
1571 Py_ssize_t sep_len;
1572
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1576 }
1577#ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580#endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1583
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001584 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1588 );
1589}
1590
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001591Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001592rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593{
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597
1598 if (list == NULL)
1599 return NULL;
1600
Andrew Dalke02758d62006-05-26 15:21:01 +00001601 i = j = len-1;
1602
1603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1616
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001618 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001619 if (PyList_Reverse(list) < 0)
1620 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001627Py_LOCAL(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001628rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629{
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001644 }
1645 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
1647 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001650 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001651 if (PyList_Reverse(list) < 0)
1652 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 return list;
1654
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1658}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659
1660PyDoc_STRVAR(rsplit__doc__,
1661"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662\n\
1663Return a list of the words in the string S, using sep as the\n\
1664delimiter string, starting at the end of the string and working\n\
1665to the front. If maxsplit is given, at most maxsplit splits are\n\
1666done. If sep is not specified or is None, any whitespace string\n\
1667is a separator.");
1668
1669static PyObject *
1670string_rsplit(PyStringObject *self, PyObject *args)
1671{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001675 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Martin v. Löwis9c830762006-04-13 08:37:17 +00001677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return NULL;
1679 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001680 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
1687#ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690#endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1697 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700
Andrew Dalke525eab32006-05-26 14:00:45 +00001701 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 if (list == NULL)
1703 return NULL;
1704
1705 j = len;
1706 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001707
1708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001715 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001716 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001717 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return list;
1723
Andrew Dalke525eab32006-05-26 14:00:45 +00001724onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 Py_DECREF(list);
1726 return NULL;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731"S.join(sequence) -> string\n\
1732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
1736static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001737string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
1739 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 return NULL;
1751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001753 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1757 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001763 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001768 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1783 */
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001786 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001787 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001791 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001792 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001793 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 Py_DECREF(seq);
1795 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 PyErr_SetString(PyExc_OverflowError,
1802 "join() is too long for a Python string");
1803 Py_DECREF(seq);
1804 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 }
1807
1808 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001809 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1813 }
1814
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
1821 memcpy(p, PyString_AS_STRING(item), n);
1822 p += n;
1823 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 memcpy(p, sep, seplen);
1825 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001828
Jeremy Hylton49048292000-07-11 03:28:17 +00001829 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
Tim Peters52e155e2001-06-16 05:42:57 +00001833PyObject *
1834_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001835{
Tim Petersa7259592001-06-16 05:11:17 +00001836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001839}
1840
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001841Py_LOCAL(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001843{
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1854}
1855
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001856Py_LOCAL(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001857string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001861 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001862 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
Martin v. Löwis18e16552006-02-15 17:27:45 +00001864 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001865 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001866 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867 return -2;
1868 if (PyString_Check(subobj)) {
1869 sub = PyString_AS_STRING(subobj);
1870 n = PyString_GET_SIZE(subobj);
1871 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001872#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001873 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001874 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001876 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877 return -2;
1878
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001879 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001881 if (dir > 0)
1882 return stringlib_find(s+i, last-i, sub, n, i);
1883 else
1884 return stringlib_rfind(s+i, last-i, sub, n, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885}
1886
1887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889"S.find(sub [,start [,end]]) -> int\n\
1890\n\
1891Return the lowest index in S where substring sub is found,\n\
1892such that sub is contained within s[start,end]. Optional\n\
1893arguments start and end are interpreted as in slice notation.\n\
1894\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896
1897static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001898string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001900 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901 if (result == -2)
1902 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001903 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904}
1905
1906
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001907PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908"S.index(sub [,start [,end]]) -> int\n\
1909\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001910Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911
1912static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001913string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001915 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 if (result == -2)
1917 return NULL;
1918 if (result == -1) {
1919 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001920 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921 return NULL;
1922 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001923 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924}
1925
1926
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001927PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928"S.rfind(sub [,start [,end]]) -> int\n\
1929\n\
1930Return the highest index in S where substring sub is found,\n\
1931such that sub is contained within s[start,end]. Optional\n\
1932arguments start and end are interpreted as in slice notation.\n\
1933\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001934Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935
1936static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001937string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001939 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 if (result == -2)
1941 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943}
1944
1945
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001946PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947"S.rindex(sub [,start [,end]]) -> int\n\
1948\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001949Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950
1951static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001952string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001954 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 if (result == -2)
1956 return NULL;
1957 if (result == -1) {
1958 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001959 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960 return NULL;
1961 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963}
1964
1965
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001966Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1968{
1969 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001970 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001972 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1973 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974
1975 i = 0;
1976 if (striptype != RIGHTSTRIP) {
1977 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1978 i++;
1979 }
1980 }
1981
1982 j = len;
1983 if (striptype != LEFTSTRIP) {
1984 do {
1985 j--;
1986 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1987 j++;
1988 }
1989
1990 if (i == 0 && j == len && PyString_CheckExact(self)) {
1991 Py_INCREF(self);
1992 return (PyObject*)self;
1993 }
1994 else
1995 return PyString_FromStringAndSize(s+i, j-i);
1996}
1997
1998
Fredrik Lundh7c940d12006-05-26 16:32:42 +00001999Py_LOCAL(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002000do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001{
2002 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 i = 0;
2006 if (striptype != RIGHTSTRIP) {
2007 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2008 i++;
2009 }
2010 }
2011
2012 j = len;
2013 if (striptype != LEFTSTRIP) {
2014 do {
2015 j--;
2016 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2017 j++;
2018 }
2019
Tim Peters8fa5dd02001-09-12 02:18:30 +00002020 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 Py_INCREF(self);
2022 return (PyObject*)self;
2023 }
2024 else
2025 return PyString_FromStringAndSize(s+i, j-i);
2026}
2027
2028
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002029Py_LOCAL(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002030do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2031{
2032 PyObject *sep = NULL;
2033
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002034 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002035 return NULL;
2036
2037 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002038 if (PyString_Check(sep))
2039 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002040#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002041 else if (PyUnicode_Check(sep)) {
2042 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2043 PyObject *res;
2044 if (uniself==NULL)
2045 return NULL;
2046 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2047 striptype, sep);
2048 Py_DECREF(uniself);
2049 return res;
2050 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002051#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002052 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002054 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 STRIPNAME(striptype));
2059 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060 }
2061
2062 return do_strip(self, striptype);
2063}
2064
2065
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002066PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002067"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068\n\
2069Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002070whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002071If chars is given and not None, remove characters in chars instead.\n\
2072If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002073
2074static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002075string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077 if (PyTuple_GET_SIZE(args) == 0)
2078 return do_strip(self, BOTHSTRIP); /* Common case */
2079 else
2080 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081}
2082
2083
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002084PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002085"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002088If chars is given and not None, remove characters in chars instead.\n\
2089If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
2091static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002092string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094 if (PyTuple_GET_SIZE(args) == 0)
2095 return do_strip(self, LEFTSTRIP); /* Common case */
2096 else
2097 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002098}
2099
2100
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002101PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002102"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002104Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002105If chars is given and not None, remove characters in chars instead.\n\
2106If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107
2108static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002109string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111 if (PyTuple_GET_SIZE(args) == 0)
2112 return do_strip(self, RIGHTSTRIP); /* Common case */
2113 else
2114 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115}
2116
2117
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002118PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119"S.lower() -> string\n\
2120\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002121Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002123/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2124#ifndef _tolower
2125#define _tolower tolower
2126#endif
2127
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002129string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002131 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002132 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002133 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002135 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002136 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138
2139 s = PyString_AS_STRING(newobj);
2140
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002141 memcpy(s, PyString_AS_STRING(self), n);
2142
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002144 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002145 if (isupper(c))
2146 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002148
Anthony Baxtera6286212006-04-11 07:42:36 +00002149 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150}
2151
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002152PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153"S.upper() -> string\n\
2154\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002155Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002157#ifndef _toupper
2158#define _toupper toupper
2159#endif
2160
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002162string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002164 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002165 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002166 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002168 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002169 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171
2172 s = PyString_AS_STRING(newobj);
2173
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002174 memcpy(s, PyString_AS_STRING(self), n);
2175
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002177 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002178 if (islower(c))
2179 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002181
Anthony Baxtera6286212006-04-11 07:42:36 +00002182 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183}
2184
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002185PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186"S.title() -> string\n\
2187\n\
2188Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190
2191static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002192string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193{
2194 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002195 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002197 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198
Anthony Baxtera6286212006-04-11 07:42:36 +00002199 newobj = PyString_FromStringAndSize(NULL, n);
2200 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002202 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 for (i = 0; i < n; i++) {
2204 int c = Py_CHARMASK(*s++);
2205 if (islower(c)) {
2206 if (!previous_is_cased)
2207 c = toupper(c);
2208 previous_is_cased = 1;
2209 } else if (isupper(c)) {
2210 if (previous_is_cased)
2211 c = tolower(c);
2212 previous_is_cased = 1;
2213 } else
2214 previous_is_cased = 0;
2215 *s_new++ = c;
2216 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002217 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218}
2219
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002220PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221"S.capitalize() -> string\n\
2222\n\
2223Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225
2226static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002227string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228{
2229 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002230 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002231 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232
Anthony Baxtera6286212006-04-11 07:42:36 +00002233 newobj = PyString_FromStringAndSize(NULL, n);
2234 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002236 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 if (0 < n) {
2238 int c = Py_CHARMASK(*s++);
2239 if (islower(c))
2240 *s_new = toupper(c);
2241 else
2242 *s_new = c;
2243 s_new++;
2244 }
2245 for (i = 1; i < n; i++) {
2246 int c = Py_CHARMASK(*s++);
2247 if (isupper(c))
2248 *s_new = tolower(c);
2249 else
2250 *s_new = c;
2251 s_new++;
2252 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002253 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254}
2255
2256
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002257PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258"S.count(sub[, start[, end]]) -> int\n\
2259\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002260Return the number of non-overlapping occurrences of substring sub in\n\
2261string S[start:end]. Optional arguments start and end are interpreted\n\
2262as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263
2264static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002265string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002267 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002268 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002269 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002270 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272
Guido van Rossumc6821402000-05-08 14:08:05 +00002273 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2274 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002276
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 if (PyString_Check(subobj)) {
2278 sub = PyString_AS_STRING(subobj);
2279 n = PyString_GET_SIZE(subobj);
2280 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002281#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002282 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002283 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002284 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2285 if (count == -1)
2286 return NULL;
2287 else
2288 return PyInt_FromLong((long) count);
2289 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002290#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2292 return NULL;
2293
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002294 string_adjust_indices(&i, &last, len);
2295
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 m = last + 1 - n;
2297 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002298 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299
Fredrik Lundhaf722372006-05-25 17:55:31 +00002300 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2301 if (r < 0)
2302 r = 0; /* no match */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002303 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304}
2305
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002306PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307"S.swapcase() -> string\n\
2308\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002310converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311
2312static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002313string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314{
2315 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002316 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002317 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318
Anthony Baxtera6286212006-04-11 07:42:36 +00002319 newobj = PyString_FromStringAndSize(NULL, n);
2320 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002322 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323 for (i = 0; i < n; i++) {
2324 int c = Py_CHARMASK(*s++);
2325 if (islower(c)) {
2326 *s_new = toupper(c);
2327 }
2328 else if (isupper(c)) {
2329 *s_new = tolower(c);
2330 }
2331 else
2332 *s_new = c;
2333 s_new++;
2334 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002335 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336}
2337
2338
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002339PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340"S.translate(table [,deletechars]) -> string\n\
2341\n\
2342Return a copy of the string S, where all characters occurring\n\
2343in the optional argument deletechars are removed, and the\n\
2344remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002345translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346
2347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002348string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350 register char *input, *output;
2351 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002352 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002355 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 PyObject *result;
2357 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002360 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363
2364 if (PyString_Check(tableobj)) {
2365 table1 = PyString_AS_STRING(tableobj);
2366 tablen = PyString_GET_SIZE(tableobj);
2367 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002368#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002370 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 parameter; instead a mapping to None will cause characters
2372 to be deleted. */
2373 if (delobj != NULL) {
2374 PyErr_SetString(PyExc_TypeError,
2375 "deletions are implemented differently for unicode");
2376 return NULL;
2377 }
2378 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2379 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002380#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383
Martin v. Löwis00b61272002-12-12 20:03:19 +00002384 if (tablen != 256) {
2385 PyErr_SetString(PyExc_ValueError,
2386 "translation table must be 256 characters long");
2387 return NULL;
2388 }
2389
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 if (delobj != NULL) {
2391 if (PyString_Check(delobj)) {
2392 del_table = PyString_AS_STRING(delobj);
2393 dellen = PyString_GET_SIZE(delobj);
2394 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002395#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396 else if (PyUnicode_Check(delobj)) {
2397 PyErr_SetString(PyExc_TypeError,
2398 "deletions are implemented differently for unicode");
2399 return NULL;
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2403 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 }
2405 else {
2406 del_table = NULL;
2407 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 }
2409
2410 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002411 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412 result = PyString_FromStringAndSize((char *)NULL, inlen);
2413 if (result == NULL)
2414 return NULL;
2415 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002416 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417
2418 if (dellen == 0) {
2419 /* If no deletions are required, use faster code */
2420 for (i = inlen; --i >= 0; ) {
2421 c = Py_CHARMASK(*input++);
2422 if (Py_CHARMASK((*output++ = table[c])) != c)
2423 changed = 1;
2424 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002425 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 return result;
2427 Py_DECREF(result);
2428 Py_INCREF(input_obj);
2429 return input_obj;
2430 }
2431
2432 for (i = 0; i < 256; i++)
2433 trans_table[i] = Py_CHARMASK(table[i]);
2434
2435 for (i = 0; i < dellen; i++)
2436 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2437
2438 for (i = inlen; --i >= 0; ) {
2439 c = Py_CHARMASK(*input++);
2440 if (trans_table[c] != -1)
2441 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2442 continue;
2443 changed = 1;
2444 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002445 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 Py_DECREF(result);
2447 Py_INCREF(input_obj);
2448 return input_obj;
2449 }
2450 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002451 if (inlen > 0)
2452 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453 return result;
2454}
2455
2456
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002457#define FORWARD 1
2458#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002460/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002461
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002462#define findchar(target, target_len, c) \
2463 ((char *)memchr((const void *)(target), c, target_len))
2464
2465/* String ops must return a string. */
2466/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002467Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002468return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002470 if (PyString_CheckExact(self)) {
2471 Py_INCREF(self);
2472 return self;
2473 }
2474 return (PyStringObject *)PyString_FromStringAndSize(
2475 PyString_AS_STRING(self),
2476 PyString_GET_SIZE(self));
2477}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002479Py_LOCAL(Py_ssize_t)
Andrew Dalke51324072006-05-26 20:25:22 +00002480 countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002481{
2482 Py_ssize_t count=0;
2483 char *start=target;
2484 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002486 while ( (start=findchar(start, end-start, c)) != NULL ) {
2487 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002488 if (count >= maxcount)
2489 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002490 start += 1;
2491 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002492 return count;
2493}
2494
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002495Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496findstring(char *target, Py_ssize_t target_len,
2497 char *pattern, Py_ssize_t pattern_len,
2498 Py_ssize_t start,
2499 Py_ssize_t end,
2500 int direction)
2501{
2502 if (start < 0) {
2503 start += target_len;
2504 if (start < 0)
2505 start = 0;
2506 }
2507 if (end > target_len) {
2508 end = target_len;
2509 } else if (end < 0) {
2510 end += target_len;
2511 if (end < 0)
2512 end = 0;
2513 }
2514
2515 /* zero-length substrings always match at the first attempt */
2516 if (pattern_len == 0)
2517 return (direction > 0) ? start : end;
2518
2519 end -= pattern_len;
2520
2521 if (direction < 0) {
2522 for (; end >= start; end--)
2523 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2524 return end;
2525 } else {
2526 for (; start <= end; start++)
2527 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2528 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529 }
2530 return -1;
2531}
2532
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002533Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002534countstring(char *target, Py_ssize_t target_len,
2535 char *pattern, Py_ssize_t pattern_len,
2536 Py_ssize_t start,
2537 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002538 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002540 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002542 if (start < 0) {
2543 start += target_len;
2544 if (start < 0)
2545 start = 0;
2546 }
2547 if (end > target_len) {
2548 end = target_len;
2549 } else if (end < 0) {
2550 end += target_len;
2551 if (end < 0)
2552 end = 0;
2553 }
2554
2555 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002556 if (pattern_len == 0 || maxcount == 0) {
2557 if (target_len+1 < maxcount)
2558 return target_len+1;
2559 return maxcount;
2560 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002561
2562 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002564 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2566 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002567 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 end -= pattern_len-1;
2569 }
2570 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002571 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002572 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2573 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002574 if (--maxcount <= 0)
2575 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002576 start += pattern_len-1;
2577 }
2578 }
2579 return count;
2580}
2581
2582
2583/* Algorithms for difference cases of string replacement */
2584
2585/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002586Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587replace_interleave(PyStringObject *self,
2588 PyStringObject *to,
2589 Py_ssize_t maxcount)
2590{
2591 char *self_s, *to_s, *result_s;
2592 Py_ssize_t self_len, to_len, result_len;
2593 Py_ssize_t count, i, product;
2594 PyStringObject *result;
2595
2596 self_len = PyString_GET_SIZE(self);
2597 to_len = PyString_GET_SIZE(to);
2598
2599 /* 1 at the end plus 1 after every character */
2600 count = self_len+1;
2601 if (maxcount < count)
2602 count = maxcount;
2603
2604 /* Check for overflow */
2605 /* result_len = count * to_len + self_len; */
2606 product = count * to_len;
2607 if (product / to_len != count) {
2608 PyErr_SetString(PyExc_OverflowError,
2609 "replace string is too long");
2610 return NULL;
2611 }
2612 result_len = product + self_len;
2613 if (result_len < 0) {
2614 PyErr_SetString(PyExc_OverflowError,
2615 "replace string is too long");
2616 return NULL;
2617 }
2618
2619 if (! (result = (PyStringObject *)
2620 PyString_FromStringAndSize(NULL, result_len)) )
2621 return NULL;
2622
2623 self_s = PyString_AS_STRING(self);
2624 to_s = PyString_AS_STRING(to);
2625 to_len = PyString_GET_SIZE(to);
2626 result_s = PyString_AS_STRING(result);
2627
2628 /* TODO: special case single character, which doesn't need memcpy */
2629
2630 /* Lay the first one down (guaranteed this will occur) */
2631 memcpy(result_s, to_s, to_len);
2632 result_s += to_len;
2633 count -= 1;
2634
2635 for (i=0; i<count; i++) {
2636 *result_s++ = *self_s++;
2637 memcpy(result_s, to_s, to_len);
2638 result_s += to_len;
2639 }
2640
2641 /* Copy the rest of the original string */
2642 memcpy(result_s, self_s, self_len-i);
2643
2644 return result;
2645}
2646
2647/* Special case for deleting a single character */
2648/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002649Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650replace_delete_single_character(PyStringObject *self,
2651 char from_c, Py_ssize_t maxcount)
2652{
2653 char *self_s, *result_s;
2654 char *start, *next, *end;
2655 Py_ssize_t self_len, result_len;
2656 Py_ssize_t count;
2657 PyStringObject *result;
2658
2659 self_len = PyString_GET_SIZE(self);
2660 self_s = PyString_AS_STRING(self);
2661
Andrew Dalke51324072006-05-26 20:25:22 +00002662 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002663 if (count == 0) {
2664 return return_self(self);
2665 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666
2667 result_len = self_len - count; /* from_len == 1 */
2668 assert(result_len>=0);
2669
2670 if ( (result = (PyStringObject *)
2671 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2672 return NULL;
2673 result_s = PyString_AS_STRING(result);
2674
2675 start = self_s;
2676 end = self_s + self_len;
2677 while (count-- > 0) {
2678 next = findchar(start, end-start, from_c);
2679 if (next == NULL)
2680 break;
2681 memcpy(result_s, start, next-start);
2682 result_s += (next-start);
2683 start = next+1;
2684 }
2685 memcpy(result_s, start, end-start);
2686
2687 return result;
2688}
2689
2690/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2691
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002692Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002693replace_delete_substring(PyStringObject *self, PyStringObject *from,
2694 Py_ssize_t maxcount) {
2695 char *self_s, *from_s, *result_s;
2696 char *start, *next, *end;
2697 Py_ssize_t self_len, from_len, result_len;
2698 Py_ssize_t count, offset;
2699 PyStringObject *result;
2700
2701 self_len = PyString_GET_SIZE(self);
2702 self_s = PyString_AS_STRING(self);
2703 from_len = PyString_GET_SIZE(from);
2704 from_s = PyString_AS_STRING(from);
2705
2706 count = countstring(self_s, self_len,
2707 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002708 0, self_len, 1,
2709 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002710
2711 if (count == 0) {
2712 /* no matches */
2713 return return_self(self);
2714 }
2715
2716 result_len = self_len - (count * from_len);
2717 assert (result_len>=0);
2718
2719 if ( (result = (PyStringObject *)
2720 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2721 return NULL;
2722
2723 result_s = PyString_AS_STRING(result);
2724
2725 start = self_s;
2726 end = self_s + self_len;
2727 while (count-- > 0) {
2728 offset = findstring(start, end-start,
2729 from_s, from_len,
2730 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002731 if (offset == -1)
2732 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 next = start + offset;
2734
2735 memcpy(result_s, start, next-start);
2736
2737 result_s += (next-start);
2738 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002739 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740 memcpy(result_s, start, end-start);
2741 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742}
2743
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002745Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746replace_single_character_in_place(PyStringObject *self,
2747 char from_c, char to_c,
2748 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002749{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002750 char *self_s, *result_s, *start, *end, *next;
2751 Py_ssize_t self_len;
2752 PyStringObject *result;
2753
2754 /* The result string will be the same size */
2755 self_s = PyString_AS_STRING(self);
2756 self_len = PyString_GET_SIZE(self);
2757
2758 next = findchar(self_s, self_len, from_c);
2759
2760 if (next == NULL) {
2761 /* No matches; return the original string */
2762 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002763 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002764
2765 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002766 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002767 if (result == NULL)
2768 return NULL;
2769 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002770 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771
2772 /* change everything in-place, starting with this one */
2773 start = result_s + (next-self_s);
2774 *start = to_c;
2775 start++;
2776 end = result_s + self_len;
2777
2778 while (--maxcount > 0) {
2779 next = findchar(start, end-start, from_c);
2780 if (next == NULL)
2781 break;
2782 *next = to_c;
2783 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002784 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785
2786 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002787}
2788
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002790Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791replace_substring_in_place(PyStringObject *self,
2792 PyStringObject *from,
2793 PyStringObject *to,
2794 Py_ssize_t maxcount)
2795{
2796 char *result_s, *start, *end;
2797 char *self_s, *from_s, *to_s;
2798 Py_ssize_t self_len, from_len, offset;
2799 PyStringObject *result;
2800
2801 /* The result string will be the same size */
2802
2803 self_s = PyString_AS_STRING(self);
2804 self_len = PyString_GET_SIZE(self);
2805
2806 from_s = PyString_AS_STRING(from);
2807 from_len = PyString_GET_SIZE(from);
2808 to_s = PyString_AS_STRING(to);
2809
2810 offset = findstring(self_s, self_len,
2811 from_s, from_len,
2812 0, self_len, FORWARD);
2813
2814 if (offset == -1) {
2815 /* No matches; return the original string */
2816 return return_self(self);
2817 }
2818
2819 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002820 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821 if (result == NULL)
2822 return NULL;
2823 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002824 memcpy(result_s, self_s, self_len);
2825
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002826
2827 /* change everything in-place, starting with this one */
2828 start = result_s + offset;
2829 memcpy(start, to_s, from_len);
2830 start += from_len;
2831 end = result_s + self_len;
2832
2833 while ( --maxcount > 0) {
2834 offset = findstring(start, end-start,
2835 from_s, from_len,
2836 0, end-start, FORWARD);
2837 if (offset==-1)
2838 break;
2839 memcpy(start+offset, to_s, from_len);
2840 start += offset+from_len;
2841 }
2842
2843 return result;
2844}
2845
2846/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002847Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848replace_single_character(PyStringObject *self,
2849 char from_c,
2850 PyStringObject *to,
2851 Py_ssize_t maxcount)
2852{
2853 char *self_s, *to_s, *result_s;
2854 char *start, *next, *end;
2855 Py_ssize_t self_len, to_len, result_len;
2856 Py_ssize_t count, product;
2857 PyStringObject *result;
2858
2859 self_s = PyString_AS_STRING(self);
2860 self_len = PyString_GET_SIZE(self);
2861
Andrew Dalke51324072006-05-26 20:25:22 +00002862 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863
2864 if (count == 0) {
2865 /* no matches, return unchanged */
2866 return return_self(self);
2867 }
2868
2869 to_s = PyString_AS_STRING(to);
2870 to_len = PyString_GET_SIZE(to);
2871
2872 /* use the difference between current and new, hence the "-1" */
2873 /* result_len = self_len + count * (to_len-1) */
2874 product = count * (to_len-1);
2875 if (product / (to_len-1) != count) {
2876 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2877 return NULL;
2878 }
2879 result_len = self_len + product;
2880 if (result_len < 0) {
2881 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2882 return NULL;
2883 }
2884
2885 if ( (result = (PyStringObject *)
2886 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2887 return NULL;
2888 result_s = PyString_AS_STRING(result);
2889
2890 start = self_s;
2891 end = self_s + self_len;
2892 while (count-- > 0) {
2893 next = findchar(start, end-start, from_c);
2894 if (next == NULL)
2895 break;
2896
2897 if (next == start) {
2898 /* replace with the 'to' */
2899 memcpy(result_s, to_s, to_len);
2900 result_s += to_len;
2901 start += 1;
2902 } else {
2903 /* copy the unchanged old then the 'to' */
2904 memcpy(result_s, start, next-start);
2905 result_s += (next-start);
2906 memcpy(result_s, to_s, to_len);
2907 result_s += to_len;
2908 start = next+1;
2909 }
2910 }
2911 /* Copy the remainder of the remaining string */
2912 memcpy(result_s, start, end-start);
2913
2914 return result;
2915}
2916
2917/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002918Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919replace_substring(PyStringObject *self,
2920 PyStringObject *from,
2921 PyStringObject *to,
2922 Py_ssize_t maxcount) {
2923 char *self_s, *from_s, *to_s, *result_s;
2924 char *start, *next, *end;
2925 Py_ssize_t self_len, from_len, to_len, result_len;
2926 Py_ssize_t count, offset, product;
2927 PyStringObject *result;
2928
2929 self_s = PyString_AS_STRING(self);
2930 self_len = PyString_GET_SIZE(self);
2931 from_s = PyString_AS_STRING(from);
2932 from_len = PyString_GET_SIZE(from);
2933
2934 count = countstring(self_s, self_len,
2935 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002936 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002937 if (count == 0) {
2938 /* no matches, return unchanged */
2939 return return_self(self);
2940 }
2941
2942 to_s = PyString_AS_STRING(to);
2943 to_len = PyString_GET_SIZE(to);
2944
2945 /* Check for overflow */
2946 /* result_len = self_len + count * (to_len-from_len) */
2947 product = count * (to_len-from_len);
2948 if (product / (to_len-from_len) != count) {
2949 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2950 return NULL;
2951 }
2952 result_len = self_len + product;
2953 if (result_len < 0) {
2954 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2955 return NULL;
2956 }
2957
2958 if ( (result = (PyStringObject *)
2959 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2960 return NULL;
2961 result_s = PyString_AS_STRING(result);
2962
2963 start = self_s;
2964 end = self_s + self_len;
2965 while (count-- > 0) {
2966 offset = findstring(start, end-start,
2967 from_s, from_len,
2968 0, end-start, FORWARD);
2969 if (offset == -1)
2970 break;
2971 next = start+offset;
2972 if (next == start) {
2973 /* replace with the 'to' */
2974 memcpy(result_s, to_s, to_len);
2975 result_s += to_len;
2976 start += from_len;
2977 } else {
2978 /* copy the unchanged old then the 'to' */
2979 memcpy(result_s, start, next-start);
2980 result_s += (next-start);
2981 memcpy(result_s, to_s, to_len);
2982 result_s += to_len;
2983 start = next+from_len;
2984 }
2985 }
2986 /* Copy the remainder of the remaining string */
2987 memcpy(result_s, start, end-start);
2988
2989 return result;
2990}
2991
2992
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002993Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002994replace(PyStringObject *self,
2995 PyStringObject *from,
2996 PyStringObject *to,
2997 Py_ssize_t maxcount)
2998{
2999 Py_ssize_t from_len, to_len;
3000
3001 if (maxcount < 0) {
3002 maxcount = PY_SSIZE_T_MAX;
3003 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3004 /* nothing to do; return the original string */
3005 return return_self(self);
3006 }
3007
3008 from_len = PyString_GET_SIZE(from);
3009 to_len = PyString_GET_SIZE(to);
3010
3011 if (maxcount == 0 ||
3012 (from_len == 0 && to_len == 0)) {
3013 /* nothing to do; return the original string */
3014 return return_self(self);
3015 }
3016
3017 /* Handle zero-length special cases */
3018
3019 if (from_len == 0) {
3020 /* insert the 'to' string everywhere. */
3021 /* >>> "Python".replace("", ".") */
3022 /* '.P.y.t.h.o.n.' */
3023 return replace_interleave(self, to, maxcount);
3024 }
3025
3026 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3027 /* point for an empty self string to generate a non-empty string */
3028 /* Special case so the remaining code always gets a non-empty string */
3029 if (PyString_GET_SIZE(self) == 0) {
3030 return return_self(self);
3031 }
3032
3033 if (to_len == 0) {
3034 /* delete all occurances of 'from' string */
3035 if (from_len == 1) {
3036 return replace_delete_single_character(
3037 self, PyString_AS_STRING(from)[0], maxcount);
3038 } else {
3039 return replace_delete_substring(self, from, maxcount);
3040 }
3041 }
3042
3043 /* Handle special case where both strings have the same length */
3044
3045 if (from_len == to_len) {
3046 if (from_len == 1) {
3047 return replace_single_character_in_place(
3048 self,
3049 PyString_AS_STRING(from)[0],
3050 PyString_AS_STRING(to)[0],
3051 maxcount);
3052 } else {
3053 return replace_substring_in_place(
3054 self, from, to, maxcount);
3055 }
3056 }
3057
3058 /* Otherwise use the more generic algorithms */
3059 if (from_len == 1) {
3060 return replace_single_character(self, PyString_AS_STRING(from)[0],
3061 to, maxcount);
3062 } else {
3063 /* len('from')>=2, len('to')>=1 */
3064 return replace_substring(self, from, to, maxcount);
3065 }
3066}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003068PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003069"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003070\n\
3071Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003072old replaced by new. If the optional argument count is\n\
3073given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003074
3075static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003076string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003077{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003078 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003079 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003080 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003082
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003083 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003084 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003085
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003086 if (PyString_Check(from)) {
3087 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003089#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003090 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003091 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003093#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003094 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095 return NULL;
3096
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003097 if (PyString_Check(to)) {
3098 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003100#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003101 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003102 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003103 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003104#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003105 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106 return NULL;
3107
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003108 return (PyObject *)replace((PyStringObject *) self,
3109 (PyStringObject *) from,
3110 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003111}
3112
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003113/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003114
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003115PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003116"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003117\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003118Return True if S starts with the specified prefix, False otherwise.\n\
3119With optional start, test S beginning at that position.\n\
3120With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003121
3122static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003123string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003124{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003126 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003128 Py_ssize_t plen;
3129 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003130 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132
Guido van Rossumc6821402000-05-08 14:08:05 +00003133 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3134 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 return NULL;
3136 if (PyString_Check(subobj)) {
3137 prefix = PyString_AS_STRING(subobj);
3138 plen = PyString_GET_SIZE(subobj);
3139 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003140#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003141 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003142 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003143 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003144 subobj, start, end, -1);
3145 if (rc == -1)
3146 return NULL;
3147 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003148 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003149 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003150#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152 return NULL;
3153
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003154 string_adjust_indices(&start, &end, len);
3155
3156 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003157 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003159 if (end-start >= plen)
3160 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3161 else
3162 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003163}
3164
3165
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003166PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003167"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003168\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003169Return True if S ends with the specified suffix, False otherwise.\n\
3170With optional start, test S beginning at that position.\n\
3171With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172
3173static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003174string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003175{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003177 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003178 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003179 Py_ssize_t slen;
3180 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003181 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183
Guido van Rossumc6821402000-05-08 14:08:05 +00003184 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3185 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 return NULL;
3187 if (PyString_Check(subobj)) {
3188 suffix = PyString_AS_STRING(subobj);
3189 slen = PyString_GET_SIZE(subobj);
3190 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003191#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003192 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003193 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003194 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003195 subobj, start, end, +1);
3196 if (rc == -1)
3197 return NULL;
3198 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003199 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003200 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003201#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203 return NULL;
3204
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003205 string_adjust_indices(&start, &end, len);
3206
3207 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003208 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003210 if (end-slen > start)
3211 start = end - slen;
3212 if (end-start >= slen)
3213 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3214 else
3215 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216}
3217
3218
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003219PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003220"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222Encodes S using the codec registered for encoding. encoding defaults\n\
3223to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003225a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226'xmlcharrefreplace' as well as any other name registered with\n\
3227codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003228
3229static PyObject *
3230string_encode(PyStringObject *self, PyObject *args)
3231{
3232 char *encoding = NULL;
3233 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003234 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003235
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003236 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003239 if (v == NULL)
3240 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003241 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242 PyErr_Format(PyExc_TypeError,
3243 "encoder did not return a string/unicode object "
3244 "(type=%.400s)",
3245 v->ob_type->tp_name);
3246 Py_DECREF(v);
3247 return NULL;
3248 }
3249 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003250
3251 onError:
3252 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003253}
3254
3255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003256PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257"S.decode([encoding[,errors]]) -> object\n\
3258\n\
3259Decodes S using the codec registered for encoding. encoding defaults\n\
3260to the default encoding. errors may be given to set a different error\n\
3261handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003262a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263as well as any other name registerd with codecs.register_error that is\n\
3264able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003265
3266static PyObject *
3267string_decode(PyStringObject *self, PyObject *args)
3268{
3269 char *encoding = NULL;
3270 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003272
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003276 if (v == NULL)
3277 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003278 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279 PyErr_Format(PyExc_TypeError,
3280 "decoder did not return a string/unicode object "
3281 "(type=%.400s)",
3282 v->ob_type->tp_name);
3283 Py_DECREF(v);
3284 return NULL;
3285 }
3286 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003287
3288 onError:
3289 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003290}
3291
3292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003293PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294"S.expandtabs([tabsize]) -> string\n\
3295\n\
3296Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003297If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298
3299static PyObject*
3300string_expandtabs(PyStringObject *self, PyObject *args)
3301{
3302 const char *e, *p;
3303 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003304 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305 PyObject *u;
3306 int tabsize = 8;
3307
3308 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309 return NULL;
3310
Thomas Wouters7e474022000-07-16 12:04:32 +00003311 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003312 i = j = 0;
3313 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3314 for (p = PyString_AS_STRING(self); p < e; p++)
3315 if (*p == '\t') {
3316 if (tabsize > 0)
3317 j += tabsize - (j % tabsize);
3318 }
3319 else {
3320 j++;
3321 if (*p == '\n' || *p == '\r') {
3322 i += j;
3323 j = 0;
3324 }
3325 }
3326
3327 /* Second pass: create output string and fill it */
3328 u = PyString_FromStringAndSize(NULL, i + j);
3329 if (!u)
3330 return NULL;
3331
3332 j = 0;
3333 q = PyString_AS_STRING(u);
3334
3335 for (p = PyString_AS_STRING(self); p < e; p++)
3336 if (*p == '\t') {
3337 if (tabsize > 0) {
3338 i = tabsize - (j % tabsize);
3339 j += i;
3340 while (i--)
3341 *q++ = ' ';
3342 }
3343 }
3344 else {
3345 j++;
3346 *q++ = *p;
3347 if (*p == '\n' || *p == '\r')
3348 j = 0;
3349 }
3350
3351 return u;
3352}
3353
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003354Py_LOCAL(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003355pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356{
3357 PyObject *u;
3358
3359 if (left < 0)
3360 left = 0;
3361 if (right < 0)
3362 right = 0;
3363
Tim Peters8fa5dd02001-09-12 02:18:30 +00003364 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003365 Py_INCREF(self);
3366 return (PyObject *)self;
3367 }
3368
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003369 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003370 left + PyString_GET_SIZE(self) + right);
3371 if (u) {
3372 if (left)
3373 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003374 memcpy(PyString_AS_STRING(u) + left,
3375 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376 PyString_GET_SIZE(self));
3377 if (right)
3378 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3379 fill, right);
3380 }
3381
3382 return u;
3383}
3384
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003385PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003386"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003387"\n"
3388"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003389"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390
3391static PyObject *
3392string_ljust(PyStringObject *self, PyObject *args)
3393{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003394 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003395 char fillchar = ' ';
3396
Thomas Wouters4abb3662006-04-19 14:50:15 +00003397 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398 return NULL;
3399
Tim Peters8fa5dd02001-09-12 02:18:30 +00003400 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401 Py_INCREF(self);
3402 return (PyObject*) self;
3403 }
3404
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003405 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406}
3407
3408
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003409PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003410"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003411"\n"
3412"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003413"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414
3415static PyObject *
3416string_rjust(PyStringObject *self, PyObject *args)
3417{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003418 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003419 char fillchar = ' ';
3420
Thomas Wouters4abb3662006-04-19 14:50:15 +00003421 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422 return NULL;
3423
Tim Peters8fa5dd02001-09-12 02:18:30 +00003424 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 Py_INCREF(self);
3426 return (PyObject*) self;
3427 }
3428
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430}
3431
3432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003433PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003435"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003436"Return S centered in a string of length width. Padding is\n"
3437"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438
3439static PyObject *
3440string_center(PyStringObject *self, PyObject *args)
3441{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003442 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003443 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445
Thomas Wouters4abb3662006-04-19 14:50:15 +00003446 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447 return NULL;
3448
Tim Peters8fa5dd02001-09-12 02:18:30 +00003449 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003450 Py_INCREF(self);
3451 return (PyObject*) self;
3452 }
3453
3454 marg = width - PyString_GET_SIZE(self);
3455 left = marg / 2 + (marg & width & 1);
3456
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003457 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458}
3459
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003460PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003461"S.zfill(width) -> string\n"
3462"\n"
3463"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003464"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003465
3466static PyObject *
3467string_zfill(PyStringObject *self, PyObject *args)
3468{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003469 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003470 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003471 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003472 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003473
Thomas Wouters4abb3662006-04-19 14:50:15 +00003474 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003475 return NULL;
3476
3477 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003478 if (PyString_CheckExact(self)) {
3479 Py_INCREF(self);
3480 return (PyObject*) self;
3481 }
3482 else
3483 return PyString_FromStringAndSize(
3484 PyString_AS_STRING(self),
3485 PyString_GET_SIZE(self)
3486 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003487 }
3488
3489 fill = width - PyString_GET_SIZE(self);
3490
3491 s = pad(self, fill, 0, '0');
3492
3493 if (s == NULL)
3494 return NULL;
3495
3496 p = PyString_AS_STRING(s);
3497 if (p[fill] == '+' || p[fill] == '-') {
3498 /* move sign to beginning of string */
3499 p[0] = p[fill];
3500 p[fill] = '0';
3501 }
3502
3503 return (PyObject*) s;
3504}
3505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003506PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003507"S.isspace() -> bool\n\
3508\n\
3509Return True if all characters in S are whitespace\n\
3510and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003511
3512static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003513string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514{
Fred Drakeba096332000-07-09 07:04:36 +00003515 register const unsigned char *p
3516 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003517 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518
Guido van Rossum4c08d552000-03-10 22:55:18 +00003519 /* Shortcut for single character strings */
3520 if (PyString_GET_SIZE(self) == 1 &&
3521 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003523
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003524 /* Special case for empty strings */
3525 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003526 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003527
Guido van Rossum4c08d552000-03-10 22:55:18 +00003528 e = p + PyString_GET_SIZE(self);
3529 for (; p < e; p++) {
3530 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003531 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003533 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534}
3535
3536
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003537PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003540Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003541and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003542
3543static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003544string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003545{
Fred Drakeba096332000-07-09 07:04:36 +00003546 register const unsigned char *p
3547 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003548 register const unsigned char *e;
3549
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550 /* Shortcut for single character strings */
3551 if (PyString_GET_SIZE(self) == 1 &&
3552 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003553 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003554
3555 /* Special case for empty strings */
3556 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003557 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558
3559 e = p + PyString_GET_SIZE(self);
3560 for (; p < e; p++) {
3561 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565}
3566
3567
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003571Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003572and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573
3574static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003575string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576{
Fred Drakeba096332000-07-09 07:04:36 +00003577 register const unsigned char *p
3578 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 register const unsigned char *e;
3580
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581 /* Shortcut for single character strings */
3582 if (PyString_GET_SIZE(self) == 1 &&
3583 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003585
3586 /* Special case for empty strings */
3587 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589
3590 e = p + PyString_GET_SIZE(self);
3591 for (; p < e; p++) {
3592 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003595 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596}
3597
3598
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003599PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003602Return True if all characters in S are digits\n\
3603and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003604
3605static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003606string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607{
Fred Drakeba096332000-07-09 07:04:36 +00003608 register const unsigned char *p
3609 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003610 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612 /* Shortcut for single character strings */
3613 if (PyString_GET_SIZE(self) == 1 &&
3614 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003617 /* Special case for empty strings */
3618 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003620
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621 e = p + PyString_GET_SIZE(self);
3622 for (; p < e; p++) {
3623 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627}
3628
3629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003630PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003634at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635
3636static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003637string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638{
Fred Drakeba096332000-07-09 07:04:36 +00003639 register const unsigned char *p
3640 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003641 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 int cased;
3643
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644 /* Shortcut for single character strings */
3645 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003648 /* Special case for empty strings */
3649 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003651
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652 e = p + PyString_GET_SIZE(self);
3653 cased = 0;
3654 for (; p < e; p++) {
3655 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 else if (!cased && islower(*p))
3658 cased = 1;
3659 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661}
3662
3663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003664PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003667Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003668at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669
3670static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003671string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672{
Fred Drakeba096332000-07-09 07:04:36 +00003673 register const unsigned char *p
3674 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003675 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 int cased;
3677
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 /* Shortcut for single character strings */
3679 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003682 /* Special case for empty strings */
3683 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003685
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 e = p + PyString_GET_SIZE(self);
3687 cased = 0;
3688 for (; p < e; p++) {
3689 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691 else if (!cased && isupper(*p))
3692 cased = 1;
3693 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695}
3696
3697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003698PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003701Return True if S is a titlecased string and there is at least one\n\
3702character in S, i.e. uppercase characters may only follow uncased\n\
3703characters and lowercase characters only cased ones. Return False\n\
3704otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705
3706static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003707string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708{
Fred Drakeba096332000-07-09 07:04:36 +00003709 register const unsigned char *p
3710 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003711 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 int cased, previous_is_cased;
3713
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714 /* Shortcut for single character strings */
3715 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003718 /* Special case for empty strings */
3719 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003721
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 e = p + PyString_GET_SIZE(self);
3723 cased = 0;
3724 previous_is_cased = 0;
3725 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003726 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003727
3728 if (isupper(ch)) {
3729 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 previous_is_cased = 1;
3732 cased = 1;
3733 }
3734 else if (islower(ch)) {
3735 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737 previous_is_cased = 1;
3738 cased = 1;
3739 }
3740 else
3741 previous_is_cased = 0;
3742 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003743 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744}
3745
3746
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003747PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003748"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749\n\
3750Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003751Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003752is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754static PyObject*
3755string_splitlines(PyStringObject *self, PyObject *args)
3756{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003757 register Py_ssize_t i;
3758 register Py_ssize_t j;
3759 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003760 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 PyObject *list;
3762 PyObject *str;
3763 char *data;
3764
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003765 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 return NULL;
3767
3768 data = PyString_AS_STRING(self);
3769 len = PyString_GET_SIZE(self);
3770
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003771 /* This does not use the preallocated list because splitlines is
3772 usually run with hundreds of newlines. The overhead of
3773 switching between PyList_SET_ITEM and append causes about a
3774 2-3% slowdown for that common case. A smarter implementation
3775 could move the if check out, so the SET_ITEMs are done first
3776 and the appends only done when the prealloc buffer is full.
3777 That's too much work for little gain.*/
3778
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 list = PyList_New(0);
3780 if (!list)
3781 goto onError;
3782
3783 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003784 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003785
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 /* Find a line and append it */
3787 while (i < len && data[i] != '\n' && data[i] != '\r')
3788 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789
3790 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003791 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 if (i < len) {
3793 if (data[i] == '\r' && i + 1 < len &&
3794 data[i+1] == '\n')
3795 i += 2;
3796 else
3797 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003798 if (keepends)
3799 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003801 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802 j = i;
3803 }
3804 if (j < len) {
3805 SPLIT_APPEND(data, j, len);
3806 }
3807
3808 return list;
3809
3810 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003811 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003812 return NULL;
3813}
3814
3815#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003816#undef SPLIT_ADD
3817#undef MAX_PREALLOC
3818#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003819
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003820static PyObject *
3821string_getnewargs(PyStringObject *v)
3822{
3823 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3824}
3825
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003826
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003827static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003828string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003829 /* Counterparts of the obsolete stropmodule functions; except
3830 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003831 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3832 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003833 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003834 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3835 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003836 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3837 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3838 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3839 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3840 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3841 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3842 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003843 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3844 capitalize__doc__},
3845 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3846 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3847 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003848 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3850 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3851 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3852 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3853 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3854 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3855 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003856 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3857 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003858 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3859 startswith__doc__},
3860 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3861 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3862 swapcase__doc__},
3863 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3864 translate__doc__},
3865 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3866 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3867 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3868 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3869 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3870 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3871 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3872 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3873 expandtabs__doc__},
3874 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3875 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003876 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003877 {NULL, NULL} /* sentinel */
3878};
3879
Jeremy Hylton938ace62002-07-17 16:30:39 +00003880static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003881str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3882
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003883static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003884string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003885{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003886 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003887 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003888
Guido van Rossumae960af2001-08-30 03:11:59 +00003889 if (type != &PyString_Type)
3890 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003891 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3892 return NULL;
3893 if (x == NULL)
3894 return PyString_FromString("");
3895 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003896}
3897
Guido van Rossumae960af2001-08-30 03:11:59 +00003898static PyObject *
3899str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3900{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003901 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003902 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003903
3904 assert(PyType_IsSubtype(type, &PyString_Type));
3905 tmp = string_new(&PyString_Type, args, kwds);
3906 if (tmp == NULL)
3907 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003908 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003909 n = PyString_GET_SIZE(tmp);
3910 pnew = type->tp_alloc(type, n);
3911 if (pnew != NULL) {
3912 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003913 ((PyStringObject *)pnew)->ob_shash =
3914 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003915 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003916 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003917 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003918 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003919}
3920
Guido van Rossumcacfc072002-05-24 19:01:59 +00003921static PyObject *
3922basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3923{
3924 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003925 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003926 return NULL;
3927}
3928
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003929static PyObject *
3930string_mod(PyObject *v, PyObject *w)
3931{
3932 if (!PyString_Check(v)) {
3933 Py_INCREF(Py_NotImplemented);
3934 return Py_NotImplemented;
3935 }
3936 return PyString_Format(v, w);
3937}
3938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003939PyDoc_STRVAR(basestring_doc,
3940"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003941
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003942static PyNumberMethods string_as_number = {
3943 0, /*nb_add*/
3944 0, /*nb_subtract*/
3945 0, /*nb_multiply*/
3946 0, /*nb_divide*/
3947 string_mod, /*nb_remainder*/
3948};
3949
3950
Guido van Rossumcacfc072002-05-24 19:01:59 +00003951PyTypeObject PyBaseString_Type = {
3952 PyObject_HEAD_INIT(&PyType_Type)
3953 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003954 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003955 0,
3956 0,
3957 0, /* tp_dealloc */
3958 0, /* tp_print */
3959 0, /* tp_getattr */
3960 0, /* tp_setattr */
3961 0, /* tp_compare */
3962 0, /* tp_repr */
3963 0, /* tp_as_number */
3964 0, /* tp_as_sequence */
3965 0, /* tp_as_mapping */
3966 0, /* tp_hash */
3967 0, /* tp_call */
3968 0, /* tp_str */
3969 0, /* tp_getattro */
3970 0, /* tp_setattro */
3971 0, /* tp_as_buffer */
3972 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3973 basestring_doc, /* tp_doc */
3974 0, /* tp_traverse */
3975 0, /* tp_clear */
3976 0, /* tp_richcompare */
3977 0, /* tp_weaklistoffset */
3978 0, /* tp_iter */
3979 0, /* tp_iternext */
3980 0, /* tp_methods */
3981 0, /* tp_members */
3982 0, /* tp_getset */
3983 &PyBaseObject_Type, /* tp_base */
3984 0, /* tp_dict */
3985 0, /* tp_descr_get */
3986 0, /* tp_descr_set */
3987 0, /* tp_dictoffset */
3988 0, /* tp_init */
3989 0, /* tp_alloc */
3990 basestring_new, /* tp_new */
3991 0, /* tp_free */
3992};
3993
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003994PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003995"str(object) -> string\n\
3996\n\
3997Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003998If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003999
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004000PyTypeObject PyString_Type = {
4001 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004002 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004004 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004005 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004006 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007 (printfunc)string_print, /* tp_print */
4008 0, /* tp_getattr */
4009 0, /* tp_setattr */
4010 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004011 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004012 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004013 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004014 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015 (hashfunc)string_hash, /* tp_hash */
4016 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004017 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004018 PyObject_GenericGetAttr, /* tp_getattro */
4019 0, /* tp_setattro */
4020 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004021 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004022 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023 string_doc, /* tp_doc */
4024 0, /* tp_traverse */
4025 0, /* tp_clear */
4026 (richcmpfunc)string_richcompare, /* tp_richcompare */
4027 0, /* tp_weaklistoffset */
4028 0, /* tp_iter */
4029 0, /* tp_iternext */
4030 string_methods, /* tp_methods */
4031 0, /* tp_members */
4032 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004033 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004034 0, /* tp_dict */
4035 0, /* tp_descr_get */
4036 0, /* tp_descr_set */
4037 0, /* tp_dictoffset */
4038 0, /* tp_init */
4039 0, /* tp_alloc */
4040 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004041 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004042};
4043
4044void
Fred Drakeba096332000-07-09 07:04:36 +00004045PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004046{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004048 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004049 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004050 if (w == NULL || !PyString_Check(*pv)) {
4051 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004052 *pv = NULL;
4053 return;
4054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055 v = string_concat((PyStringObject *) *pv, w);
4056 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004057 *pv = v;
4058}
4059
Guido van Rossum013142a1994-08-30 08:19:36 +00004060void
Fred Drakeba096332000-07-09 07:04:36 +00004061PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004062{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004063 PyString_Concat(pv, w);
4064 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004065}
4066
4067
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004068/* The following function breaks the notion that strings are immutable:
4069 it changes the size of a string. We get away with this only if there
4070 is only one module referencing the object. You can also think of it
4071 as creating a new string object and destroying the old one, only
4072 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004073 already be known to some other part of the code...
4074 Note that if there's not enough memory to resize the string, the original
4075 string object at *pv is deallocated, *pv is set to NULL, an "out of
4076 memory" exception is set, and -1 is returned. Else (on success) 0 is
4077 returned, and the value in *pv may or may not be the same as on input.
4078 As always, an extra byte is allocated for a trailing \0 byte (newsize
4079 does *not* include that), and a trailing \0 byte is stored.
4080*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004081
4082int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004083_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004085 register PyObject *v;
4086 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004087 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004088 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4089 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 Py_DECREF(v);
4092 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004093 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004094 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004095 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004096 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004097 _Py_ForgetReference(v);
4098 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004099 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004101 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004102 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004103 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004104 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 _Py_NewReference(*pv);
4106 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004107 sv->ob_size = newsize;
4108 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004109 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110 return 0;
4111}
Guido van Rossume5372401993-03-16 12:15:04 +00004112
4113/* Helpers for formatstring */
4114
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004115Py_LOCAL(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004116getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004117{
Thomas Wouters977485d2006-02-16 15:59:12 +00004118 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004119 if (argidx < arglen) {
4120 (*p_argidx)++;
4121 if (arglen < 0)
4122 return args;
4123 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004124 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004125 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126 PyErr_SetString(PyExc_TypeError,
4127 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004128 return NULL;
4129}
4130
Tim Peters38fd5b62000-09-21 05:43:11 +00004131/* Format codes
4132 * F_LJUST '-'
4133 * F_SIGN '+'
4134 * F_BLANK ' '
4135 * F_ALT '#'
4136 * F_ZERO '0'
4137 */
Guido van Rossume5372401993-03-16 12:15:04 +00004138#define F_LJUST (1<<0)
4139#define F_SIGN (1<<1)
4140#define F_BLANK (1<<2)
4141#define F_ALT (1<<3)
4142#define F_ZERO (1<<4)
4143
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004144Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004145formatfloat(char *buf, size_t buflen, int flags,
4146 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004147{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004148 /* fmt = '%#.' + `prec` + `type`
4149 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004150 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004151 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004152 x = PyFloat_AsDouble(v);
4153 if (x == -1.0 && PyErr_Occurred()) {
4154 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004155 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004156 }
Guido van Rossume5372401993-03-16 12:15:04 +00004157 if (prec < 0)
4158 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004159 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4160 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004161 /* Worst case length calc to ensure no buffer overrun:
4162
4163 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 fmt = %#.<prec>g
4165 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004166 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004167 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004168
4169 'f' formats:
4170 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4171 len = 1 + 50 + 1 + prec = 52 + prec
4172
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004173 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004174 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004175
4176 */
4177 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4178 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004179 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004180 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004181 return -1;
4182 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004183 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4184 (flags&F_ALT) ? "#" : "",
4185 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004186 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004187 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004188}
4189
Tim Peters38fd5b62000-09-21 05:43:11 +00004190/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4191 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4192 * Python's regular ints.
4193 * Return value: a new PyString*, or NULL if error.
4194 * . *pbuf is set to point into it,
4195 * *plen set to the # of chars following that.
4196 * Caller must decref it when done using pbuf.
4197 * The string starting at *pbuf is of the form
4198 * "-"? ("0x" | "0X")? digit+
4199 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004200 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004201 * There will be at least prec digits, zero-filled on the left if
4202 * necessary to get that many.
4203 * val object to be converted
4204 * flags bitmask of format flags; only F_ALT is looked at
4205 * prec minimum number of digits; 0-fill on left if needed
4206 * type a character in [duoxX]; u acts the same as d
4207 *
4208 * CAUTION: o, x and X conversions on regular ints can never
4209 * produce a '-' sign, but can for Python's unbounded ints.
4210 */
4211PyObject*
4212_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4213 char **pbuf, int *plen)
4214{
4215 PyObject *result = NULL;
4216 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004217 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004218 int sign; /* 1 if '-', else 0 */
4219 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004220 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 int numdigits; /* len == numnondigits + numdigits */
4222 int numnondigits = 0;
4223
4224 switch (type) {
4225 case 'd':
4226 case 'u':
4227 result = val->ob_type->tp_str(val);
4228 break;
4229 case 'o':
4230 result = val->ob_type->tp_as_number->nb_oct(val);
4231 break;
4232 case 'x':
4233 case 'X':
4234 numnondigits = 2;
4235 result = val->ob_type->tp_as_number->nb_hex(val);
4236 break;
4237 default:
4238 assert(!"'type' not in [duoxX]");
4239 }
4240 if (!result)
4241 return NULL;
4242
4243 /* To modify the string in-place, there can only be one reference. */
4244 if (result->ob_refcnt != 1) {
4245 PyErr_BadInternalCall();
4246 return NULL;
4247 }
4248 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004249 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004250 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004251 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4252 return NULL;
4253 }
4254 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004255 if (buf[len-1] == 'L') {
4256 --len;
4257 buf[len] = '\0';
4258 }
4259 sign = buf[0] == '-';
4260 numnondigits += sign;
4261 numdigits = len - numnondigits;
4262 assert(numdigits > 0);
4263
Tim Petersfff53252001-04-12 18:38:48 +00004264 /* Get rid of base marker unless F_ALT */
4265 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004266 /* Need to skip 0x, 0X or 0. */
4267 int skipped = 0;
4268 switch (type) {
4269 case 'o':
4270 assert(buf[sign] == '0');
4271 /* If 0 is only digit, leave it alone. */
4272 if (numdigits > 1) {
4273 skipped = 1;
4274 --numdigits;
4275 }
4276 break;
4277 case 'x':
4278 case 'X':
4279 assert(buf[sign] == '0');
4280 assert(buf[sign + 1] == 'x');
4281 skipped = 2;
4282 numnondigits -= 2;
4283 break;
4284 }
4285 if (skipped) {
4286 buf += skipped;
4287 len -= skipped;
4288 if (sign)
4289 buf[0] = '-';
4290 }
4291 assert(len == numnondigits + numdigits);
4292 assert(numdigits > 0);
4293 }
4294
4295 /* Fill with leading zeroes to meet minimum width. */
4296 if (prec > numdigits) {
4297 PyObject *r1 = PyString_FromStringAndSize(NULL,
4298 numnondigits + prec);
4299 char *b1;
4300 if (!r1) {
4301 Py_DECREF(result);
4302 return NULL;
4303 }
4304 b1 = PyString_AS_STRING(r1);
4305 for (i = 0; i < numnondigits; ++i)
4306 *b1++ = *buf++;
4307 for (i = 0; i < prec - numdigits; i++)
4308 *b1++ = '0';
4309 for (i = 0; i < numdigits; i++)
4310 *b1++ = *buf++;
4311 *b1 = '\0';
4312 Py_DECREF(result);
4313 result = r1;
4314 buf = PyString_AS_STRING(result);
4315 len = numnondigits + prec;
4316 }
4317
4318 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004319 if (type == 'X') {
4320 /* Need to convert all lower case letters to upper case.
4321 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004322 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004323 if (buf[i] >= 'a' && buf[i] <= 'x')
4324 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004325 }
4326 *pbuf = buf;
4327 *plen = len;
4328 return result;
4329}
4330
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004331Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004332formatint(char *buf, size_t buflen, int flags,
4333 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004334{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004335 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004336 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4337 + 1 + 1 = 24 */
4338 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004339 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004340 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004341
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004342 x = PyInt_AsLong(v);
4343 if (x == -1 && PyErr_Occurred()) {
4344 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004345 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004346 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004347 if (x < 0 && type == 'u') {
4348 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004349 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004350 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4351 sign = "-";
4352 else
4353 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004354 if (prec < 0)
4355 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004356
4357 if ((flags & F_ALT) &&
4358 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004359 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004360 * of issues that cause pain:
4361 * - when 0 is being converted, the C standard leaves off
4362 * the '0x' or '0X', which is inconsistent with other
4363 * %#x/%#X conversions and inconsistent with Python's
4364 * hex() function
4365 * - there are platforms that violate the standard and
4366 * convert 0 with the '0x' or '0X'
4367 * (Metrowerks, Compaq Tru64)
4368 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004369 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004370 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004371 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004372 * We can achieve the desired consistency by inserting our
4373 * own '0x' or '0X' prefix, and substituting %x/%X in place
4374 * of %#x/%#X.
4375 *
4376 * Note that this is the same approach as used in
4377 * formatint() in unicodeobject.c
4378 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004379 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4380 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004381 }
4382 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004383 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4384 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004385 prec, type);
4386 }
4387
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004388 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4389 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004391 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004392 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004393 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004394 return -1;
4395 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004396 if (sign[0])
4397 PyOS_snprintf(buf, buflen, fmt, -x);
4398 else
4399 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004400 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004401}
4402
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004403Py_LOCAL(int)
Fred Drakeba096332000-07-09 07:04:36 +00004404formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004405{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004406 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004407 if (PyString_Check(v)) {
4408 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004409 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004410 }
4411 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004412 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004413 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004414 }
4415 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004416 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004417}
4418
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004419/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4420
4421 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4422 chars are formatted. XXX This is a magic number. Each formatting
4423 routine does bounds checking to ensure no overflow, but a better
4424 solution may be to malloc a buffer of appropriate size for each
4425 format. For now, the current solution is sufficient.
4426*/
4427#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004428
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004429PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004430PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004431{
4432 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004433 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004434 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004435 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004436 PyObject *result, *orig_args;
4437#ifdef Py_USING_UNICODE
4438 PyObject *v, *w;
4439#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004440 PyObject *dict = NULL;
4441 if (format == NULL || !PyString_Check(format) || args == NULL) {
4442 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004443 return NULL;
4444 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004445 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004446 fmt = PyString_AS_STRING(format);
4447 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004448 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004449 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004450 if (result == NULL)
4451 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004452 res = PyString_AsString(result);
4453 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004454 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004455 argidx = 0;
4456 }
4457 else {
4458 arglen = -1;
4459 argidx = -2;
4460 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004461 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4462 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004463 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004464 while (--fmtcnt >= 0) {
4465 if (*fmt != '%') {
4466 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004467 rescnt = fmtcnt + 100;
4468 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004469 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004470 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004471 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004472 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004473 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004474 }
4475 *res++ = *fmt++;
4476 }
4477 else {
4478 /* Got a format specifier */
4479 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004480 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004481 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004482 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004483 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004484 PyObject *v = NULL;
4485 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004486 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004487 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004488 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004489 char formatbuf[FORMATBUFLEN];
4490 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004491#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004492 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004493 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004494#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004495
Guido van Rossumda9c2711996-12-05 21:58:58 +00004496 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004497 if (*fmt == '(') {
4498 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004499 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004501 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004502
4503 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004504 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004505 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004506 goto error;
4507 }
4508 ++fmt;
4509 --fmtcnt;
4510 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004511 /* Skip over balanced parentheses */
4512 while (pcount > 0 && --fmtcnt >= 0) {
4513 if (*fmt == ')')
4514 --pcount;
4515 else if (*fmt == '(')
4516 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004517 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004518 }
4519 keylen = fmt - keystart - 1;
4520 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004521 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004522 "incomplete format key");
4523 goto error;
4524 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004525 key = PyString_FromStringAndSize(keystart,
4526 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004527 if (key == NULL)
4528 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004529 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004531 args_owned = 0;
4532 }
4533 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004535 if (args == NULL) {
4536 goto error;
4537 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004538 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004539 arglen = -1;
4540 argidx = -2;
4541 }
Guido van Rossume5372401993-03-16 12:15:04 +00004542 while (--fmtcnt >= 0) {
4543 switch (c = *fmt++) {
4544 case '-': flags |= F_LJUST; continue;
4545 case '+': flags |= F_SIGN; continue;
4546 case ' ': flags |= F_BLANK; continue;
4547 case '#': flags |= F_ALT; continue;
4548 case '0': flags |= F_ZERO; continue;
4549 }
4550 break;
4551 }
4552 if (c == '*') {
4553 v = getnextarg(args, arglen, &argidx);
4554 if (v == NULL)
4555 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004556 if (!PyInt_Check(v)) {
4557 PyErr_SetString(PyExc_TypeError,
4558 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004559 goto error;
4560 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004561 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004562 if (width < 0) {
4563 flags |= F_LJUST;
4564 width = -width;
4565 }
Guido van Rossume5372401993-03-16 12:15:04 +00004566 if (--fmtcnt >= 0)
4567 c = *fmt++;
4568 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004569 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004570 width = c - '0';
4571 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004572 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004573 if (!isdigit(c))
4574 break;
4575 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 PyErr_SetString(
4577 PyExc_ValueError,
4578 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004579 goto error;
4580 }
4581 width = width*10 + (c - '0');
4582 }
4583 }
4584 if (c == '.') {
4585 prec = 0;
4586 if (--fmtcnt >= 0)
4587 c = *fmt++;
4588 if (c == '*') {
4589 v = getnextarg(args, arglen, &argidx);
4590 if (v == NULL)
4591 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 if (!PyInt_Check(v)) {
4593 PyErr_SetString(
4594 PyExc_TypeError,
4595 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004596 goto error;
4597 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004598 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004599 if (prec < 0)
4600 prec = 0;
4601 if (--fmtcnt >= 0)
4602 c = *fmt++;
4603 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004604 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004605 prec = c - '0';
4606 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004607 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004608 if (!isdigit(c))
4609 break;
4610 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004611 PyErr_SetString(
4612 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004613 "prec too big");
4614 goto error;
4615 }
4616 prec = prec*10 + (c - '0');
4617 }
4618 }
4619 } /* prec */
4620 if (fmtcnt >= 0) {
4621 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004622 if (--fmtcnt >= 0)
4623 c = *fmt++;
4624 }
4625 }
4626 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 PyErr_SetString(PyExc_ValueError,
4628 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004629 goto error;
4630 }
4631 if (c != '%') {
4632 v = getnextarg(args, arglen, &argidx);
4633 if (v == NULL)
4634 goto error;
4635 }
4636 sign = 0;
4637 fill = ' ';
4638 switch (c) {
4639 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004640 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004641 len = 1;
4642 break;
4643 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004644#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004645 if (PyUnicode_Check(v)) {
4646 fmt = fmt_start;
4647 argidx = argidx_start;
4648 goto unicode;
4649 }
Georg Brandld45014b2005-10-01 17:06:00 +00004650#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004651 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004652#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004653 if (temp != NULL && PyUnicode_Check(temp)) {
4654 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004655 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004656 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004657 goto unicode;
4658 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004659#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004660 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004661 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004662 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004663 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004664 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004665 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004666 if (!PyString_Check(temp)) {
4667 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004668 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004669 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004670 goto error;
4671 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004672 pbuf = PyString_AS_STRING(temp);
4673 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004674 if (prec >= 0 && len > prec)
4675 len = prec;
4676 break;
4677 case 'i':
4678 case 'd':
4679 case 'u':
4680 case 'o':
4681 case 'x':
4682 case 'X':
4683 if (c == 'i')
4684 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004685 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004686 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004687 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004688 prec, c, &pbuf, &ilen);
4689 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004690 if (!temp)
4691 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004692 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004693 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004694 else {
4695 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004696 len = formatint(pbuf,
4697 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004698 flags, prec, c, v);
4699 if (len < 0)
4700 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004701 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004702 }
4703 if (flags & F_ZERO)
4704 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004705 break;
4706 case 'e':
4707 case 'E':
4708 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004709 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004710 case 'g':
4711 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004712 if (c == 'F')
4713 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004714 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004715 len = formatfloat(pbuf, sizeof(formatbuf),
4716 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004717 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004718 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004719 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004720 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004721 fill = '0';
4722 break;
4723 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004724#ifdef Py_USING_UNICODE
4725 if (PyUnicode_Check(v)) {
4726 fmt = fmt_start;
4727 argidx = argidx_start;
4728 goto unicode;
4729 }
4730#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004731 pbuf = formatbuf;
4732 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004733 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004734 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004735 break;
4736 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004737 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004738 "unsupported format character '%c' (0x%x) "
4739 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004740 c, c,
4741 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004742 goto error;
4743 }
4744 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004745 if (*pbuf == '-' || *pbuf == '+') {
4746 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004747 len--;
4748 }
4749 else if (flags & F_SIGN)
4750 sign = '+';
4751 else if (flags & F_BLANK)
4752 sign = ' ';
4753 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004754 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004755 }
4756 if (width < len)
4757 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004758 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004759 reslen -= rescnt;
4760 rescnt = width + fmtcnt + 100;
4761 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004762 if (reslen < 0) {
4763 Py_DECREF(result);
4764 return PyErr_NoMemory();
4765 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004766 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004767 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004768 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004769 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004770 }
4771 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004772 if (fill != ' ')
4773 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004774 rescnt--;
4775 if (width > len)
4776 width--;
4777 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004778 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4779 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004780 assert(pbuf[1] == c);
4781 if (fill != ' ') {
4782 *res++ = *pbuf++;
4783 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004784 }
Tim Petersfff53252001-04-12 18:38:48 +00004785 rescnt -= 2;
4786 width -= 2;
4787 if (width < 0)
4788 width = 0;
4789 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 }
4791 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004792 do {
4793 --rescnt;
4794 *res++ = fill;
4795 } while (--width > len);
4796 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004797 if (fill == ' ') {
4798 if (sign)
4799 *res++ = sign;
4800 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004801 (c == 'x' || c == 'X')) {
4802 assert(pbuf[0] == '0');
4803 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004804 *res++ = *pbuf++;
4805 *res++ = *pbuf++;
4806 }
4807 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004808 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004809 res += len;
4810 rescnt -= len;
4811 while (--width >= len) {
4812 --rescnt;
4813 *res++ = ' ';
4814 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004815 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004816 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004817 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004818 goto error;
4819 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004820 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004821 } /* '%' */
4822 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004823 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004824 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004825 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004826 goto error;
4827 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004828 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004829 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004830 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004831 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004832 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004833
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004834#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004835 unicode:
4836 if (args_owned) {
4837 Py_DECREF(args);
4838 args_owned = 0;
4839 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004840 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004841 if (PyTuple_Check(orig_args) && argidx > 0) {
4842 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004843 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004844 v = PyTuple_New(n);
4845 if (v == NULL)
4846 goto error;
4847 while (--n >= 0) {
4848 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4849 Py_INCREF(w);
4850 PyTuple_SET_ITEM(v, n, w);
4851 }
4852 args = v;
4853 } else {
4854 Py_INCREF(orig_args);
4855 args = orig_args;
4856 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004857 args_owned = 1;
4858 /* Take what we have of the result and let the Unicode formatting
4859 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004860 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004861 if (_PyString_Resize(&result, rescnt))
4862 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004863 fmtcnt = PyString_GET_SIZE(format) - \
4864 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004865 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4866 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004867 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004868 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004869 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004870 if (v == NULL)
4871 goto error;
4872 /* Paste what we have (result) to what the Unicode formatting
4873 function returned (v) and return the result (or error) */
4874 w = PyUnicode_Concat(result, v);
4875 Py_DECREF(result);
4876 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004877 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004878 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004879#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004880
Guido van Rossume5372401993-03-16 12:15:04 +00004881 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004882 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004883 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004884 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004885 }
Guido van Rossume5372401993-03-16 12:15:04 +00004886 return NULL;
4887}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004888
Guido van Rossum2a61e741997-01-18 07:55:05 +00004889void
Fred Drakeba096332000-07-09 07:04:36 +00004890PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004891{
4892 register PyStringObject *s = (PyStringObject *)(*p);
4893 PyObject *t;
4894 if (s == NULL || !PyString_Check(s))
4895 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004896 /* If it's a string subclass, we don't really know what putting
4897 it in the interned dict might do. */
4898 if (!PyString_CheckExact(s))
4899 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004900 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004901 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004902 if (interned == NULL) {
4903 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004904 if (interned == NULL) {
4905 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004906 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004907 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004908 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004909 t = PyDict_GetItem(interned, (PyObject *)s);
4910 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004911 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004912 Py_DECREF(*p);
4913 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004914 return;
4915 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004916
Armin Rigo79f7ad22004-08-07 19:27:39 +00004917 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004918 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004919 return;
4920 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004921 /* The two references in interned are not counted by refcnt.
4922 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004923 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004924 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004925}
4926
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927void
4928PyString_InternImmortal(PyObject **p)
4929{
4930 PyString_InternInPlace(p);
4931 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4932 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4933 Py_INCREF(*p);
4934 }
4935}
4936
Guido van Rossum2a61e741997-01-18 07:55:05 +00004937
4938PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004939PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004940{
4941 PyObject *s = PyString_FromString(cp);
4942 if (s == NULL)
4943 return NULL;
4944 PyString_InternInPlace(&s);
4945 return s;
4946}
4947
Guido van Rossum8cf04761997-08-02 02:57:45 +00004948void
Fred Drakeba096332000-07-09 07:04:36 +00004949PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004950{
4951 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004952 for (i = 0; i < UCHAR_MAX + 1; i++) {
4953 Py_XDECREF(characters[i]);
4954 characters[i] = NULL;
4955 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004956 Py_XDECREF(nullstring);
4957 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004958}
Barry Warsawa903ad982001-02-23 16:40:48 +00004959
Barry Warsawa903ad982001-02-23 16:40:48 +00004960void _Py_ReleaseInternedStrings(void)
4961{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962 PyObject *keys;
4963 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004964 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004965
4966 if (interned == NULL || !PyDict_Check(interned))
4967 return;
4968 keys = PyDict_Keys(interned);
4969 if (keys == NULL || !PyList_Check(keys)) {
4970 PyErr_Clear();
4971 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004972 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004973
4974 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4975 detector, interned strings are not forcibly deallocated; rather, we
4976 give them their stolen references back, and then clear and DECREF
4977 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004978
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004979 fprintf(stderr, "releasing interned strings\n");
4980 n = PyList_GET_SIZE(keys);
4981 for (i = 0; i < n; i++) {
4982 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4983 switch (s->ob_sstate) {
4984 case SSTATE_NOT_INTERNED:
4985 /* XXX Shouldn't happen */
4986 break;
4987 case SSTATE_INTERNED_IMMORTAL:
4988 s->ob_refcnt += 1;
4989 break;
4990 case SSTATE_INTERNED_MORTAL:
4991 s->ob_refcnt += 2;
4992 break;
4993 default:
4994 Py_FatalError("Inconsistent interned string state.");
4995 }
4996 s->ob_sstate = SSTATE_NOT_INTERNED;
4997 }
4998 Py_DECREF(keys);
4999 PyDict_Clear(interned);
5000 Py_DECREF(interned);
5001 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005002}