blob: 2dfac03a906bff4da5765417c3deee6068560fdb [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Fredrik Lundhaf722372006-05-25 17:55:31 +00008#undef USE_INLINE /* XXX - set via configure? */
9
10#if defined(_MSC_VER) /* this is taken from _sre.c */
11#pragma warning(disable: 4710)
12/* fastest possible local call under MSVC */
13#define LOCAL(type) static __inline type __fastcall
14#elif defined(USE_INLINE)
15#define LOCAL(type) static inline type
16#else
17#define LOCAL(type) static type
18#endif
19
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020#ifdef COUNT_ALLOCS
21int null_strings, one_strings;
22#endif
23
Guido van Rossumc0b618a1997-05-02 03:12:38 +000024static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000025static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026
Guido van Rossum45ec02a2002-08-19 21:43:18 +000027/* This dictionary holds all interned strings. Note that references to
28 strings in this dictionary are *not* counted in the string's ob_refcnt.
29 When the interned string reaches a refcnt of 0 the string deallocation
30 function will delete the reference from this dictionary.
31
Tim Petersae1d0c92006-03-17 03:29:34 +000032 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000033 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
34*/
35static PyObject *interned;
36
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000039 For both PyString_FromString() and PyString_FromStringAndSize(), the
40 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000041 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000042
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000044 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 For PyString_FromStringAndSize(), the parameter the parameter `str' is
47 either NULL or else points to a string containing at least `size' bytes.
48 For PyString_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
54 PyString object must be treated as immutable and you must not fill in nor
55 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000056
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000057 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
59 allocated for string data, not counting the null terminating character. It
60 is therefore equal to the equal to the `size' parameter (for
61 PyString_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000065PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000066{
Tim Peters9e897f42001-05-09 07:37:07 +000067 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000068 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 Py_INCREF(op);
74 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000085
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000086 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000087 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000090 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000092 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000096 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000104 PyObject *t = (PyObject *)op;
105 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000106 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111}
112
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000114PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Tim Peters62de65b2001-12-06 20:29:32 +0000116 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000117 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000118
119 assert(str != NULL);
120 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000121 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000122 PyErr_SetString(PyExc_OverflowError,
123 "string is too long for a Python string");
124 return NULL;
125 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 if (size == 0 && (op = nullstring) != NULL) {
127#ifdef COUNT_ALLOCS
128 null_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
133 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134#ifdef COUNT_ALLOCS
135 one_strings++;
136#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 Py_INCREF(op);
138 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000141 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000142 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000147 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000148 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000149 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000170 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000178#ifdef __va_copy
179 __va_copy(count, vargs);
180#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000181 count = vargs;
182#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
188 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
189 ;
190
Tim Peters8931ff12006-05-13 23:28:20 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000196 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000197
Barry Warsawdadace02001-08-24 18:32:06 +0000198 switch (*f) {
199 case 'c':
200 (void)va_arg(count, int);
201 /* fall through... */
202 case '%':
203 n++;
204 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000205 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000206 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 /* 20 bytes is enough to hold a 64-bit
208 integer. Decimal takes the most space.
209 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000210 n += 20;
211 break;
212 case 's':
213 s = va_arg(count, char*);
214 n += strlen(s);
215 break;
216 case 'p':
217 (void) va_arg(count, int);
218 /* maximum 64-bit pointer representation:
219 * 0xffffffffffffffff
220 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000222 */
223 n += 19;
224 break;
225 default:
226 /* if we stumble upon an unknown
227 formatting code, copy the rest of
228 the format string to the output
229 string. (we cannot just skip the
230 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 n += strlen(p);
233 goto expand;
234 }
235 } else
236 n++;
237 }
238 expand:
239 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000240 /* Since we've analyzed how much space we need for the worst case,
241 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000242 string = PyString_FromStringAndSize(NULL, n);
243 if (!string)
244 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000245
Barry Warsawdadace02001-08-24 18:32:06 +0000246 s = PyString_AsString(string);
247
248 for (f = format; *f; f++) {
249 if (*f == '%') {
250 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 Py_ssize_t i;
252 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000253 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000254 /* parse the width.precision part (we're only
255 interested in the precision value, if any) */
256 n = 0;
257 while (isdigit(Py_CHARMASK(*f)))
258 n = (n*10) + *f++ - '0';
259 if (*f == '.') {
260 f++;
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 }
265 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
266 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000267 /* handle the long flag, but only for %ld and %lu.
268 others can be added when necessary. */
269 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000270 longflag = 1;
271 ++f;
272 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000273 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000274 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000275 size_tflag = 1;
276 ++f;
277 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000278
Barry Warsawdadace02001-08-24 18:32:06 +0000279 switch (*f) {
280 case 'c':
281 *s++ = va_arg(vargs, int);
282 break;
283 case 'd':
284 if (longflag)
285 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000286 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
288 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
290 sprintf(s, "%d", va_arg(vargs, int));
291 s += strlen(s);
292 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000293 case 'u':
294 if (longflag)
295 sprintf(s, "%lu",
296 va_arg(vargs, unsigned long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
299 va_arg(vargs, size_t));
300 else
301 sprintf(s, "%u",
302 va_arg(vargs, unsigned int));
303 s += strlen(s);
304 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000305 case 'i':
306 sprintf(s, "%i", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 'x':
310 sprintf(s, "%x", va_arg(vargs, int));
311 s += strlen(s);
312 break;
313 case 's':
314 p = va_arg(vargs, char*);
315 i = strlen(p);
316 if (n > 0 && i > n)
317 i = n;
318 memcpy(s, p, i);
319 s += i;
320 break;
321 case 'p':
322 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000323 /* %p is ill-defined: ensure leading 0x. */
324 if (s[1] == 'X')
325 s[1] = 'x';
326 else if (s[1] != 'x') {
327 memmove(s+2, s, strlen(s)+1);
328 s[0] = '0';
329 s[1] = 'x';
330 }
Barry Warsawdadace02001-08-24 18:32:06 +0000331 s += strlen(s);
332 break;
333 case '%':
334 *s++ = '%';
335 break;
336 default:
337 strcpy(s, p);
338 s += strlen(s);
339 goto end;
340 }
341 } else
342 *s++ = *f;
343 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344
Barry Warsawdadace02001-08-24 18:32:06 +0000345 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000347 return string;
348}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000349
Barry Warsawdadace02001-08-24 18:32:06 +0000350PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000351PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000352{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354 va_list vargs;
355
356#ifdef HAVE_STDARG_PROTOTYPES
357 va_start(vargs, format);
358#else
359 va_start(vargs);
360#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000361 ret = PyString_FromFormatV(format, vargs);
362 va_end(vargs);
363 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000364}
365
366
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000368 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 const char *encoding,
370 const char *errors)
371{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 PyObject *v, *str;
373
374 str = PyString_FromStringAndSize(s, size);
375 if (str == NULL)
376 return NULL;
377 v = PyString_AsDecodedString(str, encoding, errors);
378 Py_DECREF(str);
379 return v;
380}
381
382PyObject *PyString_AsDecodedObject(PyObject *str,
383 const char *encoding,
384 const char *errors)
385{
386 PyObject *v;
387
388 if (!PyString_Check(str)) {
389 PyErr_BadArgument();
390 goto onError;
391 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393 if (encoding == NULL) {
394#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#else
397 PyErr_SetString(PyExc_ValueError, "no encoding specified");
398 goto onError;
399#endif
400 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401
402 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 v = PyCodec_Decode(str, encoding, errors);
404 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000405 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000406
407 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000410 return NULL;
411}
412
413PyObject *PyString_AsDecodedString(PyObject *str,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v;
418
419 v = PyString_AsDecodedObject(str, encoding, errors);
420 if (v == NULL)
421 goto onError;
422
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000423#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424 /* Convert Unicode to a string using the default encoding */
425 if (PyUnicode_Check(v)) {
426 PyObject *temp = v;
427 v = PyUnicode_AsEncodedString(v, NULL, NULL);
428 Py_DECREF(temp);
429 if (v == NULL)
430 goto onError;
431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433 if (!PyString_Check(v)) {
434 PyErr_Format(PyExc_TypeError,
435 "decoder did not return a string object (type=%.400s)",
436 v->ob_type->tp_name);
437 Py_DECREF(v);
438 goto onError;
439 }
440
441 return v;
442
443 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 return NULL;
445}
446
447PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000448 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000453
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 str = PyString_FromStringAndSize(s, size);
455 if (str == NULL)
456 return NULL;
457 v = PyString_AsEncodedString(str, encoding, errors);
458 Py_DECREF(str);
459 return v;
460}
461
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 const char *encoding,
464 const char *errors)
465{
466 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000467
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(str)) {
469 PyErr_BadArgument();
470 goto onError;
471 }
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473 if (encoding == NULL) {
474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#else
477 PyErr_SetString(PyExc_ValueError, "no encoding specified");
478 goto onError;
479#endif
480 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000481
482 /* Encode via the codec registry */
483 v = PyCodec_Encode(str, encoding, errors);
484 if (v == NULL)
485 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
487 return v;
488
489 onError:
490 return NULL;
491}
492
493PyObject *PyString_AsEncodedString(PyObject *str,
494 const char *encoding,
495 const char *errors)
496{
497 PyObject *v;
498
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000499 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000500 if (v == NULL)
501 goto onError;
502
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000503#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 /* Convert Unicode to a string using the default encoding */
505 if (PyUnicode_Check(v)) {
506 PyObject *temp = v;
507 v = PyUnicode_AsEncodedString(v, NULL, NULL);
508 Py_DECREF(temp);
509 if (v == NULL)
510 goto onError;
511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000512#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 if (!PyString_Check(v)) {
514 PyErr_Format(PyExc_TypeError,
515 "encoder did not return a string object (type=%.400s)",
516 v->ob_type->tp_name);
517 Py_DECREF(v);
518 goto onError;
519 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000520
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000521 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000522
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000523 onError:
524 return NULL;
525}
526
Guido van Rossum234f9421993-06-17 12:35:49 +0000527static void
Fred Drakeba096332000-07-09 07:04:36 +0000528string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000529{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000530 switch (PyString_CHECK_INTERNED(op)) {
531 case SSTATE_NOT_INTERNED:
532 break;
533
534 case SSTATE_INTERNED_MORTAL:
535 /* revive dead object temporarily for DelItem */
536 op->ob_refcnt = 3;
537 if (PyDict_DelItem(interned, op) != 0)
538 Py_FatalError(
539 "deletion of interned string failed");
540 break;
541
542 case SSTATE_INTERNED_IMMORTAL:
543 Py_FatalError("Immortal interned string died.");
544
545 default:
546 Py_FatalError("Inconsistent interned string state.");
547 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000548 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000549}
550
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551/* Unescape a backslash-escaped string. If unicode is non-zero,
552 the string is a u-literal. If recode_encoding is non-zero,
553 the string is UTF-8 encoded and should be re-encoded in the
554 specified encoding. */
555
556PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 const char *recode_encoding)
561{
562 int c;
563 char *p, *buf;
564 const char *end;
565 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000566 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000567 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 if (v == NULL)
569 return NULL;
570 p = buf = PyString_AsString(v);
571 end = s + len;
572 while (s < end) {
573 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000574 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575#ifdef Py_USING_UNICODE
576 if (recode_encoding && (*s & 0x80)) {
577 PyObject *u, *w;
578 char *r;
579 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000580 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 t = s;
582 /* Decode non-ASCII bytes as UTF-8. */
583 while (t < end && (*t & 0x80)) t++;
584 u = PyUnicode_DecodeUTF8(s, t - s, errors);
585 if(!u) goto failed;
586
587 /* Recode them in target encoding. */
588 w = PyUnicode_AsEncodedString(
589 u, recode_encoding, errors);
590 Py_DECREF(u);
591 if (!w) goto failed;
592
593 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000594 assert(PyString_Check(w));
595 r = PyString_AS_STRING(w);
596 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 memcpy(p, r, rn);
598 p += rn;
599 Py_DECREF(w);
600 s = t;
601 } else {
602 *p++ = *s++;
603 }
604#else
605 *p++ = *s++;
606#endif
607 continue;
608 }
609 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000610 if (s==end) {
611 PyErr_SetString(PyExc_ValueError,
612 "Trailing \\ in string");
613 goto failed;
614 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000615 switch (*s++) {
616 /* XXX This assumes ASCII! */
617 case '\n': break;
618 case '\\': *p++ = '\\'; break;
619 case '\'': *p++ = '\''; break;
620 case '\"': *p++ = '\"'; break;
621 case 'b': *p++ = '\b'; break;
622 case 'f': *p++ = '\014'; break; /* FF */
623 case 't': *p++ = '\t'; break;
624 case 'n': *p++ = '\n'; break;
625 case 'r': *p++ = '\r'; break;
626 case 'v': *p++ = '\013'; break; /* VT */
627 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
628 case '0': case '1': case '2': case '3':
629 case '4': case '5': case '6': case '7':
630 c = s[-1] - '0';
631 if ('0' <= *s && *s <= '7') {
632 c = (c<<3) + *s++ - '0';
633 if ('0' <= *s && *s <= '7')
634 c = (c<<3) + *s++ - '0';
635 }
636 *p++ = c;
637 break;
638 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000639 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 && isxdigit(Py_CHARMASK(s[1]))) {
641 unsigned int x = 0;
642 c = Py_CHARMASK(*s);
643 s++;
644 if (isdigit(c))
645 x = c - '0';
646 else if (islower(c))
647 x = 10 + c - 'a';
648 else
649 x = 10 + c - 'A';
650 x = x << 4;
651 c = Py_CHARMASK(*s);
652 s++;
653 if (isdigit(c))
654 x += c - '0';
655 else if (islower(c))
656 x += 10 + c - 'a';
657 else
658 x += 10 + c - 'A';
659 *p++ = x;
660 break;
661 }
662 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000663 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667 if (strcmp(errors, "replace") == 0) {
668 *p++ = '?';
669 } else if (strcmp(errors, "ignore") == 0)
670 /* do nothing */;
671 else {
672 PyErr_Format(PyExc_ValueError,
673 "decoding error; "
674 "unknown error handling code: %.400s",
675 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#ifndef Py_USING_UNICODE
679 case 'u':
680 case 'U':
681 case 'N':
682 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000683 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 "Unicode escapes not legal "
685 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000686 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 }
688#endif
689 default:
690 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000691 s--;
692 goto non_esc; /* an arbitry number of unescaped
693 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 }
695 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000696 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000698 return v;
699 failed:
700 Py_DECREF(v);
701 return NULL;
702}
703
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705string_getsize(register PyObject *op)
706{
707 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (PyString_AsStringAndSize(op, &s, &len))
710 return -1;
711 return len;
712}
713
714static /*const*/ char *
715string_getbuffer(register PyObject *op)
716{
717 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000718 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
720 return NULL;
721 return s;
722}
723
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
732/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000733PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (!PyString_Check(op))
736 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738}
739
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740int
741PyString_AsStringAndSize(register PyObject *obj,
742 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000743 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744{
745 if (s == NULL) {
746 PyErr_BadInternalCall();
747 return -1;
748 }
749
750 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 if (PyUnicode_Check(obj)) {
753 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
754 if (obj == NULL)
755 return -1;
756 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000757 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000758#endif
759 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_Format(PyExc_TypeError,
761 "expected string or Unicode object, "
762 "%.200s found", obj->ob_type->tp_name);
763 return -1;
764 }
765 }
766
767 *s = PyString_AS_STRING(obj);
768 if (len != NULL)
769 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000770 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000771 PyErr_SetString(PyExc_TypeError,
772 "expected string without null bytes");
773 return -1;
774 }
775 return 0;
776}
777
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778/* -------------------------------------------------------------------- */
779/* Helpers */
780
781#define USE_FAST /* experimental fast search implementation */
782
783/* XXX - this code is copied from unicodeobject.c. we really should
784 refactor the core implementations (see _sre.c for how this can be
785 done), but that'll have to wait -- fredrik */
786
787/* fast search/count implementation, based on a mix between boyer-
788 moore and horspool, with a few more bells and whistles on the top.
789 for some more background, see: http://effbot.org/stringlib */
790
791/* note: fastsearch may access s[n], which isn't a problem when using
792 Python's ordinary string types, but may cause problems if you're
793 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000794 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000795 it has actually checked for matches, but didn't find any. callers
796 beware! */
797
798#define FAST_COUNT 0
799#define FAST_SEARCH 1
800
801LOCAL(Py_ssize_t)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000802fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
Fredrik Lundhaf722372006-05-25 17:55:31 +0000803{
804 long mask;
805 int skip, count = 0;
806 Py_ssize_t i, j, mlast, w;
807
808 w = n - m;
809
810 if (w < 0)
811 return -1;
812
813 /* look for special cases */
814 if (m <= 1) {
815 if (m <= 0)
816 return -1;
817 /* use special case for 1-character strings */
818 if (mode == FAST_COUNT) {
819 for (i = 0; i < n; i++)
820 if (s[i] == p[0])
821 count++;
822 return count;
823 } else {
824 for (i = 0; i < n; i++)
825 if (s[i] == p[0])
826 return i;
827 }
828 return -1;
829 }
830
831 mlast = m - 1;
832
833 /* create compressed boyer-moore delta 1 table */
834 skip = mlast - 1;
835 /* process pattern[:-1] */
836 for (mask = i = 0; i < mlast; i++) {
837 mask |= (1 << (p[i] & 0x1F));
838 if (p[i] == p[mlast])
839 skip = mlast - i - 1;
840 }
841 /* process pattern[-1] outside the loop */
842 mask |= (1 << (p[mlast] & 0x1F));
843
844 for (i = 0; i <= w; i++) {
845 /* note: using mlast in the skip path slows things down on x86 */
846 if (s[i+m-1] == p[m-1]) {
847 /* candidate match */
848 for (j = 0; j < mlast; j++)
849 if (s[i+j] != p[j])
850 break;
851 if (j == mlast) {
852 /* got a match! */
853 if (mode != FAST_COUNT)
854 return i;
855 count++;
856 i = i + mlast;
857 continue;
858 }
859 /* miss: check if next character is part of pattern */
860 if (!(mask & (1 << (s[i+m] & 0x1F))))
861 i = i + m;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000862 else
Fredrik Lundhaf722372006-05-25 17:55:31 +0000863 i = i + skip;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000864 } else {
865 /* skip: check if next character is part of pattern */
866 if (!(mask & (1 << (s[i+m] & 0x1F))))
867 i = i + m;
868 }
869 }
870
871 if (mode != FAST_COUNT)
872 return -1;
873 return count;
874}
875
876/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877/* Methods */
878
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000882 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000885
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000886 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000887 if (! PyString_CheckExact(op)) {
888 int ret;
889 /* A str subclass may have its own __str__ method. */
890 op = (PyStringObject *) PyObject_Str((PyObject *)op);
891 if (op == NULL)
892 return -1;
893 ret = string_print(op, fp, flags);
894 Py_DECREF(op);
895 return ret;
896 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000897 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000898#ifdef __VMS
899 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
900#else
901 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
902#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000903 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000905
Thomas Wouters7e474022000-07-16 12:04:32 +0000906 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000907 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000908 if (memchr(op->ob_sval, '\'', op->ob_size) &&
909 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000910 quote = '"';
911
912 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 for (i = 0; i < op->ob_size; i++) {
914 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000915 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000916 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000917 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000918 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000919 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000920 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000921 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000922 fprintf(fp, "\\r");
923 else if (c < ' ' || c >= 0x7f)
924 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000925 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000926 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000928 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000929 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930}
931
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000932PyObject *
933PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000935 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000936 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000937 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000938 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "string is too large to make repr");
941 }
942 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000944 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 }
946 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000947 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 register char c;
949 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000950 int quote;
951
Thomas Wouters7e474022000-07-16 12:04:32 +0000952 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000953 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000954 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000955 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000956 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000957 quote = '"';
958
Tim Peters9161c8b2001-12-03 01:55:38 +0000959 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000960 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000961 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000962 /* There's at least enough room for a hex escape
963 and a closing quote. */
964 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000966 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000967 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000968 else if (c == '\t')
969 *p++ = '\\', *p++ = 't';
970 else if (c == '\n')
971 *p++ = '\\', *p++ = 'n';
972 else if (c == '\r')
973 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000974 else if (c < ' ' || c >= 0x7f) {
975 /* For performance, we don't want to call
976 PyOS_snprintf here (extra layers of
977 function call). */
978 sprintf(p, "\\x%02x", c & 0xff);
979 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000980 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000981 else
982 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000984 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000985 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000988 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000989 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991}
992
Guido van Rossum189f1df2001-05-01 16:51:53 +0000993static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000994string_repr(PyObject *op)
995{
996 return PyString_Repr(op, 1);
997}
998
999static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001000string_str(PyObject *s)
1001{
Tim Petersc9933152001-10-16 20:18:24 +00001002 assert(PyString_Check(s));
1003 if (PyString_CheckExact(s)) {
1004 Py_INCREF(s);
1005 return s;
1006 }
1007 else {
1008 /* Subtype -- return genuine string with the same value. */
1009 PyStringObject *t = (PyStringObject *) s;
1010 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
1011 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001012}
1013
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001015string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001016{
1017 return a->ob_size;
1018}
1019
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001021string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001022{
Andrew Dalke598710c2006-05-25 18:18:39 +00001023 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024 register PyStringObject *op;
1025 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001026#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001027 if (PyUnicode_Check(bb))
1028 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001030 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001031 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001032 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033 return NULL;
1034 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001037 if ((a->ob_size == 0 || b->ob_size == 0) &&
1038 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1039 if (a->ob_size == 0) {
1040 Py_INCREF(bb);
1041 return bb;
1042 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043 Py_INCREF(a);
1044 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045 }
1046 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001047 if (size < 0) {
1048 PyErr_SetString(PyExc_OverflowError,
1049 "strings are too large to concat");
1050 return NULL;
1051 }
1052
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001053 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001054 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001055 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001056 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001057 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001058 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001059 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001060 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1061 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001062 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001063 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001064#undef b
1065}
1066
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001068string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001070 register Py_ssize_t i;
1071 register Py_ssize_t j;
1072 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001074 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075 if (n < 0)
1076 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001077 /* watch out for overflows: the size can overflow int,
1078 * and the # of bytes needed can overflow size_t
1079 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001081 if (n && size / n != a->ob_size) {
1082 PyErr_SetString(PyExc_OverflowError,
1083 "repeated string is too long");
1084 return NULL;
1085 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001086 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 Py_INCREF(a);
1088 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001089 }
Tim Peterse7c05322004-06-27 17:24:49 +00001090 nbytes = (size_t)size;
1091 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001092 PyErr_SetString(PyExc_OverflowError,
1093 "repeated string is too long");
1094 return NULL;
1095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001096 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001097 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001098 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001100 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001101 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001102 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001103 op->ob_sval[size] = '\0';
1104 if (a->ob_size == 1 && n > 0) {
1105 memset(op->ob_sval, a->ob_sval[0] , n);
1106 return (PyObject *) op;
1107 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001108 i = 0;
1109 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001110 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1111 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001112 }
1113 while (i < size) {
1114 j = (i <= size-i) ? i : size-i;
1115 memcpy(op->ob_sval+i, op->ob_sval, j);
1116 i += j;
1117 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001118 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119}
1120
1121/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1122
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001123static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001124string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001125 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001126 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001127{
1128 if (i < 0)
1129 i = 0;
1130 if (j < 0)
1131 j = 0; /* Avoid signed/unsigned bug in next line */
1132 if (j > a->ob_size)
1133 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001134 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1135 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001136 Py_INCREF(a);
1137 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001138 }
1139 if (j < i)
1140 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001141 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001142}
1143
Guido van Rossum9284a572000-03-07 15:53:43 +00001144static int
Fred Drakeba096332000-07-09 07:04:36 +00001145string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001146{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001147 char *s = PyString_AS_STRING(a);
1148 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001149 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001150#ifdef USE_FAST
1151 Py_ssize_t pos;
1152#else
1153 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001154 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001155 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001156#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001157
1158 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001159#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001160 if (PyUnicode_Check(el))
1161 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001162#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001163 if (!PyString_Check(el)) {
1164 PyErr_SetString(PyExc_TypeError,
1165 "'in <string>' requires string as left operand");
1166 return -1;
1167 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001168 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001169
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001170 if (len_sub == 0)
1171 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001172
1173#ifdef USE_FAST
1174 pos = fastsearch(
1175 s, PyString_GET_SIZE(a),
1176 sub, len_sub, FAST_SEARCH
1177 );
1178 return (pos != -1);
1179#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001180 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001181 substring. When s<last, there is still room for a possible match
1182 and s[0] through s[len_sub-1] will be in bounds.
1183 shortsub is len_sub minus the last character which is checked
1184 separately just before the memcmp(). That check helps prevent
1185 false starts and saves the setup time for memcmp().
1186 */
1187 firstchar = sub[0];
1188 shortsub = len_sub - 1;
1189 lastchar = sub[shortsub];
1190 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1191 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001192 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001193 if (s == NULL)
1194 return 0;
1195 assert(s < last);
1196 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001197 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001198 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001199 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001200#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001201 return 0;
1202}
1203
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001204static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001205string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001206{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001207 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001208 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001209 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001210 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001211 return NULL;
1212 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001213 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001214 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001215 if (v == NULL)
1216 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001217 else {
1218#ifdef COUNT_ALLOCS
1219 one_strings++;
1220#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001221 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001222 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001223 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001224}
1225
Martin v. Löwiscd353062001-05-24 16:56:35 +00001226static PyObject*
1227string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001228{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001229 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001230 Py_ssize_t len_a, len_b;
1231 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001232 PyObject *result;
1233
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001234 /* Make sure both arguments are strings. */
1235 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001236 result = Py_NotImplemented;
1237 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001238 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001239 if (a == b) {
1240 switch (op) {
1241 case Py_EQ:case Py_LE:case Py_GE:
1242 result = Py_True;
1243 goto out;
1244 case Py_NE:case Py_LT:case Py_GT:
1245 result = Py_False;
1246 goto out;
1247 }
1248 }
1249 if (op == Py_EQ) {
1250 /* Supporting Py_NE here as well does not save
1251 much time, since Py_NE is rarely used. */
1252 if (a->ob_size == b->ob_size
1253 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001254 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001255 a->ob_size) == 0)) {
1256 result = Py_True;
1257 } else {
1258 result = Py_False;
1259 }
1260 goto out;
1261 }
1262 len_a = a->ob_size; len_b = b->ob_size;
1263 min_len = (len_a < len_b) ? len_a : len_b;
1264 if (min_len > 0) {
1265 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1266 if (c==0)
1267 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1268 }else
1269 c = 0;
1270 if (c == 0)
1271 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1272 switch (op) {
1273 case Py_LT: c = c < 0; break;
1274 case Py_LE: c = c <= 0; break;
1275 case Py_EQ: assert(0); break; /* unreachable */
1276 case Py_NE: c = c != 0; break;
1277 case Py_GT: c = c > 0; break;
1278 case Py_GE: c = c >= 0; break;
1279 default:
1280 result = Py_NotImplemented;
1281 goto out;
1282 }
1283 result = c ? Py_True : Py_False;
1284 out:
1285 Py_INCREF(result);
1286 return result;
1287}
1288
1289int
1290_PyString_Eq(PyObject *o1, PyObject *o2)
1291{
1292 PyStringObject *a, *b;
1293 a = (PyStringObject*)o1;
1294 b = (PyStringObject*)o2;
1295 return a->ob_size == b->ob_size
1296 && *a->ob_sval == *b->ob_sval
1297 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001298}
1299
Guido van Rossum9bfef441993-03-29 10:43:31 +00001300static long
Fred Drakeba096332000-07-09 07:04:36 +00001301string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001302{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001303 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001304 register unsigned char *p;
1305 register long x;
1306
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001307 if (a->ob_shash != -1)
1308 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001309 len = a->ob_size;
1310 p = (unsigned char *) a->ob_sval;
1311 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001312 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001313 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001314 x ^= a->ob_size;
1315 if (x == -1)
1316 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001317 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001318 return x;
1319}
1320
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001321#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1322
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001323static PyObject*
1324string_subscript(PyStringObject* self, PyObject* item)
1325{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001326 PyNumberMethods *nb = item->ob_type->tp_as_number;
1327 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1328 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001329 if (i == -1 && PyErr_Occurred())
1330 return NULL;
1331 if (i < 0)
1332 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001333 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001334 }
1335 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001336 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001337 char* source_buf;
1338 char* result_buf;
1339 PyObject* result;
1340
Tim Petersae1d0c92006-03-17 03:29:34 +00001341 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001342 PyString_GET_SIZE(self),
1343 &start, &stop, &step, &slicelength) < 0) {
1344 return NULL;
1345 }
1346
1347 if (slicelength <= 0) {
1348 return PyString_FromStringAndSize("", 0);
1349 }
1350 else {
1351 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001352 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001353 if (result_buf == NULL)
1354 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001355
Tim Petersae1d0c92006-03-17 03:29:34 +00001356 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001357 cur += step, i++) {
1358 result_buf[i] = source_buf[cur];
1359 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001360
1361 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001362 slicelength);
1363 PyMem_Free(result_buf);
1364 return result;
1365 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001366 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001367 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001368 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001369 "string indices must be integers");
1370 return NULL;
1371 }
1372}
1373
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374static Py_ssize_t
1375string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001376{
1377 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001378 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001379 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001380 return -1;
1381 }
1382 *ptr = (void *)self->ob_sval;
1383 return self->ob_size;
1384}
1385
Martin v. Löwis18e16552006-02-15 17:27:45 +00001386static Py_ssize_t
1387string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001388{
Guido van Rossum045e6881997-09-08 18:30:11 +00001389 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001390 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001391 return -1;
1392}
1393
Martin v. Löwis18e16552006-02-15 17:27:45 +00001394static Py_ssize_t
1395string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001396{
1397 if ( lenp )
1398 *lenp = self->ob_size;
1399 return 1;
1400}
1401
Martin v. Löwis18e16552006-02-15 17:27:45 +00001402static Py_ssize_t
1403string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001404{
1405 if ( index != 0 ) {
1406 PyErr_SetString(PyExc_SystemError,
1407 "accessing non-existent string segment");
1408 return -1;
1409 }
1410 *ptr = self->ob_sval;
1411 return self->ob_size;
1412}
1413
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001414static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001415 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001416 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001417 (ssizeargfunc)string_repeat, /*sq_repeat*/
1418 (ssizeargfunc)string_item, /*sq_item*/
1419 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001420 0, /*sq_ass_item*/
1421 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001422 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001423};
1424
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001425static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001426 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001427 (binaryfunc)string_subscript,
1428 0,
1429};
1430
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001431static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001432 (readbufferproc)string_buffer_getreadbuf,
1433 (writebufferproc)string_buffer_getwritebuf,
1434 (segcountproc)string_buffer_getsegcount,
1435 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001436};
1437
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
1439
1440#define LEFTSTRIP 0
1441#define RIGHTSTRIP 1
1442#define BOTHSTRIP 2
1443
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001444/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001445static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1446
1447#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001448
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001449#define SPLIT_APPEND(data, left, right) \
1450 str = PyString_FromStringAndSize((data) + (left), \
1451 (right) - (left)); \
1452 if (str == NULL) \
1453 goto onError; \
1454 if (PyList_Append(list, str)) { \
1455 Py_DECREF(str); \
1456 goto onError; \
1457 } \
1458 else \
1459 Py_DECREF(str);
1460
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001462split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001464 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466 PyObject *list = PyList_New(0);
1467
1468 if (list == NULL)
1469 return NULL;
1470
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 for (i = j = 0; i < len; ) {
1472 while (i < len && isspace(Py_CHARMASK(s[i])))
1473 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475 while (i < len && !isspace(Py_CHARMASK(s[i])))
1476 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 if (maxsplit-- <= 0)
1479 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001480 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 while (i < len && isspace(Py_CHARMASK(s[i])))
1482 i++;
1483 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 }
1485 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001488 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 Py_DECREF(list);
1492 return NULL;
1493}
1494
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001496split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001497{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001498 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001499 PyObject *str;
1500 PyObject *list = PyList_New(0);
1501
1502 if (list == NULL)
1503 return NULL;
1504
1505 for (i = j = 0; i < len; ) {
1506 if (s[i] == ch) {
1507 if (maxcount-- <= 0)
1508 break;
1509 SPLIT_APPEND(s, j, i);
1510 i = j = i + 1;
1511 } else
1512 i++;
1513 }
1514 if (j <= len) {
1515 SPLIT_APPEND(s, j, len);
1516 }
1517 return list;
1518
1519 onError:
1520 Py_DECREF(list);
1521 return NULL;
1522}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001524PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525"S.split([sep [,maxsplit]]) -> list of strings\n\
1526\n\
1527Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001529splits are done. If sep is not specified or is None, any\n\
1530whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531
1532static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001533string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001535 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1536 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001537 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 const char *s = PyString_AS_STRING(self), *sub;
1539 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540
Martin v. Löwis9c830762006-04-13 08:37:17 +00001541 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001544 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 if (PyString_Check(subobj)) {
1548 sub = PyString_AS_STRING(subobj);
1549 n = PyString_GET_SIZE(subobj);
1550 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001551#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 else if (PyUnicode_Check(subobj))
1553 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001554#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1556 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001557
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 if (n == 0) {
1559 PyErr_SetString(PyExc_ValueError, "empty separator");
1560 return NULL;
1561 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001562 else if (n == 1)
1563 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564
1565 list = PyList_New(0);
1566 if (list == NULL)
1567 return NULL;
1568
1569 i = j = 0;
1570 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001571 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 if (maxsplit-- <= 0)
1573 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001574 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 if (item == NULL)
1576 goto fail;
1577 err = PyList_Append(list, item);
1578 Py_DECREF(item);
1579 if (err < 0)
1580 goto fail;
1581 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 }
1583 else
1584 i++;
1585 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001586 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 if (item == NULL)
1588 goto fail;
1589 err = PyList_Append(list, item);
1590 Py_DECREF(item);
1591 if (err < 0)
1592 goto fail;
1593
1594 return list;
1595
1596 fail:
1597 Py_DECREF(list);
1598 return NULL;
1599}
1600
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001601PyDoc_STRVAR(partition__doc__,
1602"S.partition(sep) -> (head, sep, tail)\n\
1603\n\
1604Searches for the separator sep in S, and returns the part before it,\n\
1605the separator itself, and the part after it. If the separator is not\n\
1606found, returns S and two empty strings.");
1607
1608static PyObject *
1609string_partition(PyStringObject *self, PyObject *args)
1610{
1611 Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
1612 const char *str = PyString_AS_STRING(self), *sep;
1613 PyObject *sepobj;
1614 PyObject * out;
1615
1616 if (!PyArg_ParseTuple(args, "O:partition", &sepobj))
1617 return NULL;
1618 if (PyString_Check(sepobj)) {
1619 sep = PyString_AS_STRING(sepobj);
1620 sep_len = PyString_GET_SIZE(sepobj);
1621 }
1622#ifdef Py_USING_UNICODE_NOTYET
1623 else if (PyUnicode_Check(sepobj))
1624 return PyUnicode_Partition((PyObject *)self, sepobj);
1625#endif
1626 else if (PyObject_AsCharBuffer(sepobj, &sep, &sep_len))
1627 return NULL;
1628
1629 if (sep_len == 0) {
1630 PyErr_SetString(PyExc_ValueError, "empty separator");
1631 return NULL;
1632 }
1633
1634 out = PyTuple_New(3);
1635 if (!out)
1636 return NULL;
1637
1638 pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
1639 if (pos < 0) {
1640 Py_INCREF(self);
1641 PyTuple_SET_ITEM(out, 0, (PyObject*) self);
1642 Py_INCREF(nullstring);
1643 PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring);
1644 Py_INCREF(nullstring);
1645 PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
1646 } else {
1647 Py_INCREF(sepobj);
1648 PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
1649 PyTuple_SET_ITEM(out, 1, sepobj);
1650 PyTuple_SET_ITEM(out, 2,
1651 PyString_FromStringAndSize(str + sep_len + pos,
1652 len - sep_len - pos)
1653 );
1654 if (PyErr_Occurred()) {
1655 Py_DECREF(out);
1656 return NULL;
1657 }
1658 }
1659
1660 return out;
1661}
1662
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001663static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001664rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001665{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001666 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001667 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668 PyObject *list = PyList_New(0);
1669
1670 if (list == NULL)
1671 return NULL;
1672
1673 for (i = j = len - 1; i >= 0; ) {
1674 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1675 i--;
1676 j = i;
1677 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1678 i--;
1679 if (j > i) {
1680 if (maxsplit-- <= 0)
1681 break;
Fredrik Lundh554da412006-05-25 19:19:05 +00001682 SPLIT_APPEND(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1684 i--;
1685 j = i;
1686 }
1687 }
1688 if (j >= 0) {
Fredrik Lundh554da412006-05-25 19:19:05 +00001689 SPLIT_APPEND(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690 }
Fredrik Lundh554da412006-05-25 19:19:05 +00001691 if (PyList_Reverse(list) < 0)
1692 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001694 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695 Py_DECREF(list);
1696 return NULL;
1697}
1698
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001699static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001700rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001701{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001702 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001703 PyObject *str;
1704 PyObject *list = PyList_New(0);
1705
1706 if (list == NULL)
1707 return NULL;
1708
1709 for (i = j = len - 1; i >= 0; ) {
1710 if (s[i] == ch) {
1711 if (maxcount-- <= 0)
1712 break;
Fredrik Lundh554da412006-05-25 19:19:05 +00001713 SPLIT_APPEND(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001714 j = i = i - 1;
1715 } else
1716 i--;
1717 }
1718 if (j >= -1) {
Fredrik Lundh554da412006-05-25 19:19:05 +00001719 SPLIT_APPEND(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001720 }
Fredrik Lundh554da412006-05-25 19:19:05 +00001721 if (PyList_Reverse(list) < 0)
1722 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001723 return list;
1724
1725 onError:
1726 Py_DECREF(list);
1727 return NULL;
1728}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001729
1730PyDoc_STRVAR(rsplit__doc__,
1731"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1732\n\
1733Return a list of the words in the string S, using sep as the\n\
1734delimiter string, starting at the end of the string and working\n\
1735to the front. If maxsplit is given, at most maxsplit splits are\n\
1736done. If sep is not specified or is None, any whitespace string\n\
1737is a separator.");
1738
1739static PyObject *
1740string_rsplit(PyStringObject *self, PyObject *args)
1741{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001742 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1743 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001744 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001745 const char *s = PyString_AS_STRING(self), *sub;
1746 PyObject *list, *item, *subobj = Py_None;
1747
Martin v. Löwis9c830762006-04-13 08:37:17 +00001748 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001749 return NULL;
1750 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001751 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001752 if (subobj == Py_None)
1753 return rsplit_whitespace(s, len, maxsplit);
1754 if (PyString_Check(subobj)) {
1755 sub = PyString_AS_STRING(subobj);
1756 n = PyString_GET_SIZE(subobj);
1757 }
1758#ifdef Py_USING_UNICODE
1759 else if (PyUnicode_Check(subobj))
1760 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1761#endif
1762 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1763 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001764
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001765 if (n == 0) {
1766 PyErr_SetString(PyExc_ValueError, "empty separator");
1767 return NULL;
1768 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001769 else if (n == 1)
1770 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001771
1772 list = PyList_New(0);
1773 if (list == NULL)
1774 return NULL;
1775
1776 j = len;
1777 i = j - n;
1778 while (i >= 0) {
1779 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1780 if (maxsplit-- <= 0)
1781 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001782 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001783 if (item == NULL)
1784 goto fail;
1785 err = PyList_Insert(list, 0, item);
1786 Py_DECREF(item);
1787 if (err < 0)
1788 goto fail;
1789 j = i;
1790 i -= n;
1791 }
1792 else
1793 i--;
1794 }
1795 item = PyString_FromStringAndSize(s, j);
1796 if (item == NULL)
1797 goto fail;
1798 err = PyList_Insert(list, 0, item);
1799 Py_DECREF(item);
1800 if (err < 0)
1801 goto fail;
1802
1803 return list;
1804
1805 fail:
1806 Py_DECREF(list);
1807 return NULL;
1808}
1809
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812"S.join(sequence) -> string\n\
1813\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001814Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816
1817static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001818string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819{
1820 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001821 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001824 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001825 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001827 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828
Tim Peters19fe14e2001-01-19 03:03:47 +00001829 seq = PySequence_Fast(orig, "");
1830 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001831 return NULL;
1832 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001833
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001834 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001835 if (seqlen == 0) {
1836 Py_DECREF(seq);
1837 return PyString_FromString("");
1838 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001840 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001841 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1842 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001843 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001844 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001845 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001847
Raymond Hettinger674f2412004-08-23 23:23:54 +00001848 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001849 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001850 * Do a pre-pass to figure out the total amount of space we'll
1851 * need (sz), see whether any argument is absurd, and defer to
1852 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001853 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001854 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001855 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001856 item = PySequence_Fast_GET_ITEM(seq, i);
1857 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001858#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001859 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001860 /* Defer to Unicode join.
1861 * CAUTION: There's no gurantee that the
1862 * original sequence can be iterated over
1863 * again, so we must pass seq here.
1864 */
1865 PyObject *result;
1866 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001867 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001868 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001869 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001870#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001871 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001872 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001873 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001874 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001875 Py_DECREF(seq);
1876 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001877 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001878 sz += PyString_GET_SIZE(item);
1879 if (i != 0)
1880 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001881 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001882 PyErr_SetString(PyExc_OverflowError,
1883 "join() is too long for a Python string");
1884 Py_DECREF(seq);
1885 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001887 }
1888
1889 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001890 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001891 if (res == NULL) {
1892 Py_DECREF(seq);
1893 return NULL;
1894 }
1895
1896 /* Catenate everything. */
1897 p = PyString_AS_STRING(res);
1898 for (i = 0; i < seqlen; ++i) {
1899 size_t n;
1900 item = PySequence_Fast_GET_ITEM(seq, i);
1901 n = PyString_GET_SIZE(item);
1902 memcpy(p, PyString_AS_STRING(item), n);
1903 p += n;
1904 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001905 memcpy(p, sep, seplen);
1906 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001907 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001909
Jeremy Hylton49048292000-07-11 03:28:17 +00001910 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912}
1913
Tim Peters52e155e2001-06-16 05:42:57 +00001914PyObject *
1915_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001916{
Tim Petersa7259592001-06-16 05:11:17 +00001917 assert(sep != NULL && PyString_Check(sep));
1918 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001919 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001920}
1921
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001922static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001923string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001924{
1925 if (*end > len)
1926 *end = len;
1927 else if (*end < 0)
1928 *end += len;
1929 if (*end < 0)
1930 *end = 0;
1931 if (*start < 0)
1932 *start += len;
1933 if (*start < 0)
1934 *start = 0;
1935}
1936
Martin v. Löwis18e16552006-02-15 17:27:45 +00001937static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001938string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001940 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001942 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
Martin v. Löwis18e16552006-02-15 17:27:45 +00001945 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001946 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001947 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948 return -2;
1949 if (PyString_Check(subobj)) {
1950 sub = PyString_AS_STRING(subobj);
1951 n = PyString_GET_SIZE(subobj);
1952 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001953#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001955 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001956#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 return -2;
1959
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001960 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001962#ifdef USE_FAST
1963 if (n == 0)
1964 return (dir > 0) ? i : last;
1965 if (dir > 0) {
1966 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1967 FAST_SEARCH);
1968 if (pos < 0)
1969 return pos;
1970 return pos + i;
1971 }
1972#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001973 if (dir > 0) {
1974 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 last -= n;
1977 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001978 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001979 return (long)i;
1980 }
1981 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001982 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001983
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001987 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001988 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001989 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001990
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991 return -1;
1992}
1993
1994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001995PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996"S.find(sub [,start [,end]]) -> int\n\
1997\n\
1998Return the lowest index in S where substring sub is found,\n\
1999such that sub is contained within s[start,end]. Optional\n\
2000arguments start and end are interpreted as in slice notation.\n\
2001\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002002Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003
2004static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002005string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 if (result == -2)
2009 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002010 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011}
2012
2013
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002014PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015"S.index(sub [,start [,end]]) -> int\n\
2016\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002017Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018
2019static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002020string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002022 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 if (result == -2)
2024 return NULL;
2025 if (result == -1) {
2026 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002027 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028 return NULL;
2029 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031}
2032
2033
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002034PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035"S.rfind(sub [,start [,end]]) -> int\n\
2036\n\
2037Return the highest index in S where substring sub is found,\n\
2038such that sub is contained within s[start,end]. Optional\n\
2039arguments start and end are interpreted as in slice notation.\n\
2040\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002041Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042
2043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002044string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002046 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 if (result == -2)
2048 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002049 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050}
2051
2052
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002053PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054"S.rindex(sub [,start [,end]]) -> int\n\
2055\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057
2058static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002059string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002061 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062 if (result == -2)
2063 return NULL;
2064 if (result == -1) {
2065 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002066 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067 return NULL;
2068 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002069 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070}
2071
2072
2073static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002074do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2075{
2076 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002077 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002079 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2080 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081
2082 i = 0;
2083 if (striptype != RIGHTSTRIP) {
2084 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2085 i++;
2086 }
2087 }
2088
2089 j = len;
2090 if (striptype != LEFTSTRIP) {
2091 do {
2092 j--;
2093 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2094 j++;
2095 }
2096
2097 if (i == 0 && j == len && PyString_CheckExact(self)) {
2098 Py_INCREF(self);
2099 return (PyObject*)self;
2100 }
2101 else
2102 return PyString_FromStringAndSize(s+i, j-i);
2103}
2104
2105
2106static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002107do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108{
2109 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002110 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112 i = 0;
2113 if (striptype != RIGHTSTRIP) {
2114 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2115 i++;
2116 }
2117 }
2118
2119 j = len;
2120 if (striptype != LEFTSTRIP) {
2121 do {
2122 j--;
2123 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2124 j++;
2125 }
2126
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 Py_INCREF(self);
2129 return (PyObject*)self;
2130 }
2131 else
2132 return PyString_FromStringAndSize(s+i, j-i);
2133}
2134
2135
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136static PyObject *
2137do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2138{
2139 PyObject *sep = NULL;
2140
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002141 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002142 return NULL;
2143
2144 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002145 if (PyString_Check(sep))
2146 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002147#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002148 else if (PyUnicode_Check(sep)) {
2149 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2150 PyObject *res;
2151 if (uniself==NULL)
2152 return NULL;
2153 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2154 striptype, sep);
2155 Py_DECREF(uniself);
2156 return res;
2157 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002158#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002159 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002160#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002161 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002162#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002163 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002164#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002165 STRIPNAME(striptype));
2166 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002167 }
2168
2169 return do_strip(self, striptype);
2170}
2171
2172
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002173PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002174"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175\n\
2176Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002177whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002178If chars is given and not None, remove characters in chars instead.\n\
2179If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
2181static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002182string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002184 if (PyTuple_GET_SIZE(args) == 0)
2185 return do_strip(self, BOTHSTRIP); /* Common case */
2186 else
2187 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188}
2189
2190
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002191PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002192"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002194Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002195If chars is given and not None, remove characters in chars instead.\n\
2196If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197
2198static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002199string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002201 if (PyTuple_GET_SIZE(args) == 0)
2202 return do_strip(self, LEFTSTRIP); /* Common case */
2203 else
2204 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205}
2206
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002209"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002211Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002212If chars is given and not None, remove characters in chars instead.\n\
2213If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214
2215static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002216string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002218 if (PyTuple_GET_SIZE(args) == 0)
2219 return do_strip(self, RIGHTSTRIP); /* Common case */
2220 else
2221 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222}
2223
2224
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002225PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226"S.lower() -> string\n\
2227\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002230/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2231#ifndef _tolower
2232#define _tolower tolower
2233#endif
2234
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002236string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002238 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002239 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002242 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002243 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002245
2246 s = PyString_AS_STRING(newobj);
2247
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002248 memcpy(s, PyString_AS_STRING(self), n);
2249
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002251 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002252 if (isupper(c))
2253 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002255
Anthony Baxtera6286212006-04-11 07:42:36 +00002256 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257}
2258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260"S.upper() -> string\n\
2261\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002262Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002264#ifndef _toupper
2265#define _toupper toupper
2266#endif
2267
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002269string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002271 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002272 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002273 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002275 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002276 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002278
2279 s = PyString_AS_STRING(newobj);
2280
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002281 memcpy(s, PyString_AS_STRING(self), n);
2282
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002284 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002285 if (islower(c))
2286 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002288
Anthony Baxtera6286212006-04-11 07:42:36 +00002289 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290}
2291
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002292PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293"S.title() -> string\n\
2294\n\
2295Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002296characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297
2298static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002299string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300{
2301 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002302 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002304 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305
Anthony Baxtera6286212006-04-11 07:42:36 +00002306 newobj = PyString_FromStringAndSize(NULL, n);
2307 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002309 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 for (i = 0; i < n; i++) {
2311 int c = Py_CHARMASK(*s++);
2312 if (islower(c)) {
2313 if (!previous_is_cased)
2314 c = toupper(c);
2315 previous_is_cased = 1;
2316 } else if (isupper(c)) {
2317 if (previous_is_cased)
2318 c = tolower(c);
2319 previous_is_cased = 1;
2320 } else
2321 previous_is_cased = 0;
2322 *s_new++ = c;
2323 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002324 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325}
2326
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002327PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328"S.capitalize() -> string\n\
2329\n\
2330Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002331capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332
2333static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002334string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335{
2336 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002337 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002338 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339
Anthony Baxtera6286212006-04-11 07:42:36 +00002340 newobj = PyString_FromStringAndSize(NULL, n);
2341 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002343 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 if (0 < n) {
2345 int c = Py_CHARMASK(*s++);
2346 if (islower(c))
2347 *s_new = toupper(c);
2348 else
2349 *s_new = c;
2350 s_new++;
2351 }
2352 for (i = 1; i < n; i++) {
2353 int c = Py_CHARMASK(*s++);
2354 if (isupper(c))
2355 *s_new = tolower(c);
2356 else
2357 *s_new = c;
2358 s_new++;
2359 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002360 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361}
2362
2363
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002364PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365"S.count(sub[, start[, end]]) -> int\n\
2366\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002367Return the number of non-overlapping occurrences of substring sub in\n\
2368string S[start:end]. Optional arguments start and end are interpreted\n\
2369as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370
2371static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002372string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002374 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002375 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002376 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002377 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379
Guido van Rossumc6821402000-05-08 14:08:05 +00002380 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2381 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002383
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 if (PyString_Check(subobj)) {
2385 sub = PyString_AS_STRING(subobj);
2386 n = PyString_GET_SIZE(subobj);
2387 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002388#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002389 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002390 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002391 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2392 if (count == -1)
2393 return NULL;
2394 else
2395 return PyInt_FromLong((long) count);
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2399 return NULL;
2400
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002401 string_adjust_indices(&i, &last, len);
2402
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403 m = last + 1 - n;
2404 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002405 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406
Fredrik Lundhaf722372006-05-25 17:55:31 +00002407#ifdef USE_FAST
2408 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2409 if (r < 0)
2410 r = 0; /* no match */
2411#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412 r = 0;
2413 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002414 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 if (!memcmp(s+i, sub, n)) {
2416 r++;
2417 i += n;
2418 } else {
2419 i++;
2420 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002421 if (i >= m)
2422 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002423 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002424 if (t == NULL)
2425 break;
2426 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002428#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002429 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430}
2431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002432PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433"S.swapcase() -> string\n\
2434\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002436converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437
2438static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002439string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440{
2441 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002442 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002443 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444
Anthony Baxtera6286212006-04-11 07:42:36 +00002445 newobj = PyString_FromStringAndSize(NULL, n);
2446 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002448 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 for (i = 0; i < n; i++) {
2450 int c = Py_CHARMASK(*s++);
2451 if (islower(c)) {
2452 *s_new = toupper(c);
2453 }
2454 else if (isupper(c)) {
2455 *s_new = tolower(c);
2456 }
2457 else
2458 *s_new = c;
2459 s_new++;
2460 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002461 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462}
2463
2464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002465PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466"S.translate(table [,deletechars]) -> string\n\
2467\n\
2468Return a copy of the string S, where all characters occurring\n\
2469in the optional argument deletechars are removed, and the\n\
2470remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002471translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472
2473static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002474string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 register char *input, *output;
2477 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002478 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002481 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482 PyObject *result;
2483 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002486 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002487 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002489
2490 if (PyString_Check(tableobj)) {
2491 table1 = PyString_AS_STRING(tableobj);
2492 tablen = PyString_GET_SIZE(tableobj);
2493 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002494#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002495 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002496 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002497 parameter; instead a mapping to None will cause characters
2498 to be deleted. */
2499 if (delobj != NULL) {
2500 PyErr_SetString(PyExc_TypeError,
2501 "deletions are implemented differently for unicode");
2502 return NULL;
2503 }
2504 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2505 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002506#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509
Martin v. Löwis00b61272002-12-12 20:03:19 +00002510 if (tablen != 256) {
2511 PyErr_SetString(PyExc_ValueError,
2512 "translation table must be 256 characters long");
2513 return NULL;
2514 }
2515
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 if (delobj != NULL) {
2517 if (PyString_Check(delobj)) {
2518 del_table = PyString_AS_STRING(delobj);
2519 dellen = PyString_GET_SIZE(delobj);
2520 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002521#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 else if (PyUnicode_Check(delobj)) {
2523 PyErr_SetString(PyExc_TypeError,
2524 "deletions are implemented differently for unicode");
2525 return NULL;
2526 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002527#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2529 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 }
2531 else {
2532 del_table = NULL;
2533 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534 }
2535
2536 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002537 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002538 result = PyString_FromStringAndSize((char *)NULL, inlen);
2539 if (result == NULL)
2540 return NULL;
2541 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002542 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
2544 if (dellen == 0) {
2545 /* If no deletions are required, use faster code */
2546 for (i = inlen; --i >= 0; ) {
2547 c = Py_CHARMASK(*input++);
2548 if (Py_CHARMASK((*output++ = table[c])) != c)
2549 changed = 1;
2550 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002551 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 return result;
2553 Py_DECREF(result);
2554 Py_INCREF(input_obj);
2555 return input_obj;
2556 }
2557
2558 for (i = 0; i < 256; i++)
2559 trans_table[i] = Py_CHARMASK(table[i]);
2560
2561 for (i = 0; i < dellen; i++)
2562 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2563
2564 for (i = inlen; --i >= 0; ) {
2565 c = Py_CHARMASK(*input++);
2566 if (trans_table[c] != -1)
2567 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2568 continue;
2569 changed = 1;
2570 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002571 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572 Py_DECREF(result);
2573 Py_INCREF(input_obj);
2574 return input_obj;
2575 }
2576 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002577 if (inlen > 0)
2578 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579 return result;
2580}
2581
2582
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002583#define FORWARD 1
2584#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002587
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002588/* Don't call if length < 2 */
2589#define Py_STRING_MATCH(target, offset, pattern, length) \
2590 (target[offset] == pattern[0] && \
2591 target[offset+length-1] == pattern[length-1] && \
2592 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002593
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002594#define findchar(target, target_len, c) \
2595 ((char *)memchr((const void *)(target), c, target_len))
2596
2597/* String ops must return a string. */
2598/* If the object is subclass of string, create a copy */
2599static PyStringObject *
2600return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002601{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002602 if (PyString_CheckExact(self)) {
2603 Py_INCREF(self);
2604 return self;
2605 }
2606 return (PyStringObject *)PyString_FromStringAndSize(
2607 PyString_AS_STRING(self),
2608 PyString_GET_SIZE(self));
2609}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002611static Py_ssize_t
2612countchar(char *target, int target_len, char c)
2613{
2614 Py_ssize_t count=0;
2615 char *start=target;
2616 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 while ( (start=findchar(start, end-start, c)) != NULL ) {
2619 count++;
2620 start += 1;
2621 }
2622
2623 return count;
2624}
2625
2626static Py_ssize_t
2627findstring(char *target, Py_ssize_t target_len,
2628 char *pattern, Py_ssize_t pattern_len,
2629 Py_ssize_t start,
2630 Py_ssize_t end,
2631 int direction)
2632{
2633 if (start < 0) {
2634 start += target_len;
2635 if (start < 0)
2636 start = 0;
2637 }
2638 if (end > target_len) {
2639 end = target_len;
2640 } else if (end < 0) {
2641 end += target_len;
2642 if (end < 0)
2643 end = 0;
2644 }
2645
2646 /* zero-length substrings always match at the first attempt */
2647 if (pattern_len == 0)
2648 return (direction > 0) ? start : end;
2649
2650 end -= pattern_len;
2651
2652 if (direction < 0) {
2653 for (; end >= start; end--)
2654 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2655 return end;
2656 } else {
2657 for (; start <= end; start++)
2658 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2659 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660 }
2661 return -1;
2662}
2663
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002664Py_ssize_t
2665countstring(char *target, Py_ssize_t target_len,
2666 char *pattern, Py_ssize_t pattern_len,
2667 Py_ssize_t start,
2668 Py_ssize_t end,
2669 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002670{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002672
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002673 if (start < 0) {
2674 start += target_len;
2675 if (start < 0)
2676 start = 0;
2677 }
2678 if (end > target_len) {
2679 end = target_len;
2680 } else if (end < 0) {
2681 end += target_len;
2682 if (end < 0)
2683 end = 0;
2684 }
2685
2686 /* zero-length substrings match everywhere */
2687 if (pattern_len == 0)
2688 return target_len+1;
2689
2690 end -= pattern_len;
2691
2692 if (direction < 0) {
2693 for (; end >= start; end--)
2694 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2695 count++;
2696 end -= pattern_len-1;
2697 }
2698 } else {
2699 for (; start <= end; start++)
2700 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2701 count++;
2702 start += pattern_len-1;
2703 }
2704 }
2705 return count;
2706}
2707
2708
2709/* Algorithms for difference cases of string replacement */
2710
2711/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2712static PyStringObject *
2713replace_interleave(PyStringObject *self,
2714 PyStringObject *to,
2715 Py_ssize_t maxcount)
2716{
2717 char *self_s, *to_s, *result_s;
2718 Py_ssize_t self_len, to_len, result_len;
2719 Py_ssize_t count, i, product;
2720 PyStringObject *result;
2721
2722 self_len = PyString_GET_SIZE(self);
2723 to_len = PyString_GET_SIZE(to);
2724
2725 /* 1 at the end plus 1 after every character */
2726 count = self_len+1;
2727 if (maxcount < count)
2728 count = maxcount;
2729
2730 /* Check for overflow */
2731 /* result_len = count * to_len + self_len; */
2732 product = count * to_len;
2733 if (product / to_len != count) {
2734 PyErr_SetString(PyExc_OverflowError,
2735 "replace string is too long");
2736 return NULL;
2737 }
2738 result_len = product + self_len;
2739 if (result_len < 0) {
2740 PyErr_SetString(PyExc_OverflowError,
2741 "replace string is too long");
2742 return NULL;
2743 }
2744
2745 if (! (result = (PyStringObject *)
2746 PyString_FromStringAndSize(NULL, result_len)) )
2747 return NULL;
2748
2749 self_s = PyString_AS_STRING(self);
2750 to_s = PyString_AS_STRING(to);
2751 to_len = PyString_GET_SIZE(to);
2752 result_s = PyString_AS_STRING(result);
2753
2754 /* TODO: special case single character, which doesn't need memcpy */
2755
2756 /* Lay the first one down (guaranteed this will occur) */
2757 memcpy(result_s, to_s, to_len);
2758 result_s += to_len;
2759 count -= 1;
2760
2761 for (i=0; i<count; i++) {
2762 *result_s++ = *self_s++;
2763 memcpy(result_s, to_s, to_len);
2764 result_s += to_len;
2765 }
2766
2767 /* Copy the rest of the original string */
2768 memcpy(result_s, self_s, self_len-i);
2769
2770 return result;
2771}
2772
2773/* Special case for deleting a single character */
2774/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2775static PyStringObject *
2776replace_delete_single_character(PyStringObject *self,
2777 char from_c, Py_ssize_t maxcount)
2778{
2779 char *self_s, *result_s;
2780 char *start, *next, *end;
2781 Py_ssize_t self_len, result_len;
2782 Py_ssize_t count;
2783 PyStringObject *result;
2784
2785 self_len = PyString_GET_SIZE(self);
2786 self_s = PyString_AS_STRING(self);
2787
2788 count = countchar(self_s, self_len, from_c);
2789 if (count == 0) {
2790 return return_self(self);
2791 }
2792 if (count > maxcount)
2793 count = maxcount;
2794
2795 result_len = self_len - count; /* from_len == 1 */
2796 assert(result_len>=0);
2797
2798 if ( (result = (PyStringObject *)
2799 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2800 return NULL;
2801 result_s = PyString_AS_STRING(result);
2802
2803 start = self_s;
2804 end = self_s + self_len;
2805 while (count-- > 0) {
2806 next = findchar(start, end-start, from_c);
2807 if (next == NULL)
2808 break;
2809 memcpy(result_s, start, next-start);
2810 result_s += (next-start);
2811 start = next+1;
2812 }
2813 memcpy(result_s, start, end-start);
2814
2815 return result;
2816}
2817
2818/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2819
2820static PyStringObject *
2821replace_delete_substring(PyStringObject *self, PyStringObject *from,
2822 Py_ssize_t maxcount) {
2823 char *self_s, *from_s, *result_s;
2824 char *start, *next, *end;
2825 Py_ssize_t self_len, from_len, result_len;
2826 Py_ssize_t count, offset;
2827 PyStringObject *result;
2828
2829 self_len = PyString_GET_SIZE(self);
2830 self_s = PyString_AS_STRING(self);
2831 from_len = PyString_GET_SIZE(from);
2832 from_s = PyString_AS_STRING(from);
2833
2834 count = countstring(self_s, self_len,
2835 from_s, from_len,
2836 0, self_len, 1);
2837
2838 if (count > maxcount)
2839 count = maxcount;
2840
2841 if (count == 0) {
2842 /* no matches */
2843 return return_self(self);
2844 }
2845
2846 result_len = self_len - (count * from_len);
2847 assert (result_len>=0);
2848
2849 if ( (result = (PyStringObject *)
2850 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2851 return NULL;
2852
2853 result_s = PyString_AS_STRING(result);
2854
2855 start = self_s;
2856 end = self_s + self_len;
2857 while (count-- > 0) {
2858 offset = findstring(start, end-start,
2859 from_s, from_len,
2860 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002861 if (offset == -1)
2862 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863 next = start + offset;
2864
2865 memcpy(result_s, start, next-start);
2866
2867 result_s += (next-start);
2868 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002869 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 memcpy(result_s, start, end-start);
2871 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002872}
2873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2875static PyStringObject *
2876replace_single_character_in_place(PyStringObject *self,
2877 char from_c, char to_c,
2878 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002879{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002880 char *self_s, *result_s, *start, *end, *next;
2881 Py_ssize_t self_len;
2882 PyStringObject *result;
2883
2884 /* The result string will be the same size */
2885 self_s = PyString_AS_STRING(self);
2886 self_len = PyString_GET_SIZE(self);
2887
2888 next = findchar(self_s, self_len, from_c);
2889
2890 if (next == NULL) {
2891 /* No matches; return the original string */
2892 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002893 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894
2895 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002896 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002897 if (result == NULL)
2898 return NULL;
2899 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002900 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901
2902 /* change everything in-place, starting with this one */
2903 start = result_s + (next-self_s);
2904 *start = to_c;
2905 start++;
2906 end = result_s + self_len;
2907
2908 while (--maxcount > 0) {
2909 next = findchar(start, end-start, from_c);
2910 if (next == NULL)
2911 break;
2912 *next = to_c;
2913 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002914 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915
2916 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002917}
2918
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2920static PyStringObject *
2921replace_substring_in_place(PyStringObject *self,
2922 PyStringObject *from,
2923 PyStringObject *to,
2924 Py_ssize_t maxcount)
2925{
2926 char *result_s, *start, *end;
2927 char *self_s, *from_s, *to_s;
2928 Py_ssize_t self_len, from_len, offset;
2929 PyStringObject *result;
2930
2931 /* The result string will be the same size */
2932
2933 self_s = PyString_AS_STRING(self);
2934 self_len = PyString_GET_SIZE(self);
2935
2936 from_s = PyString_AS_STRING(from);
2937 from_len = PyString_GET_SIZE(from);
2938 to_s = PyString_AS_STRING(to);
2939
2940 offset = findstring(self_s, self_len,
2941 from_s, from_len,
2942 0, self_len, FORWARD);
2943
2944 if (offset == -1) {
2945 /* No matches; return the original string */
2946 return return_self(self);
2947 }
2948
2949 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002950 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002951 if (result == NULL)
2952 return NULL;
2953 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002954 memcpy(result_s, self_s, self_len);
2955
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002956
2957 /* change everything in-place, starting with this one */
2958 start = result_s + offset;
2959 memcpy(start, to_s, from_len);
2960 start += from_len;
2961 end = result_s + self_len;
2962
2963 while ( --maxcount > 0) {
2964 offset = findstring(start, end-start,
2965 from_s, from_len,
2966 0, end-start, FORWARD);
2967 if (offset==-1)
2968 break;
2969 memcpy(start+offset, to_s, from_len);
2970 start += offset+from_len;
2971 }
2972
2973 return result;
2974}
2975
2976/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2977static PyStringObject *
2978replace_single_character(PyStringObject *self,
2979 char from_c,
2980 PyStringObject *to,
2981 Py_ssize_t maxcount)
2982{
2983 char *self_s, *to_s, *result_s;
2984 char *start, *next, *end;
2985 Py_ssize_t self_len, to_len, result_len;
2986 Py_ssize_t count, product;
2987 PyStringObject *result;
2988
2989 self_s = PyString_AS_STRING(self);
2990 self_len = PyString_GET_SIZE(self);
2991
2992 count = countchar(self_s, self_len, from_c);
2993 if (count > maxcount)
2994 count = maxcount;
2995
2996 if (count == 0) {
2997 /* no matches, return unchanged */
2998 return return_self(self);
2999 }
3000
3001 to_s = PyString_AS_STRING(to);
3002 to_len = PyString_GET_SIZE(to);
3003
3004 /* use the difference between current and new, hence the "-1" */
3005 /* result_len = self_len + count * (to_len-1) */
3006 product = count * (to_len-1);
3007 if (product / (to_len-1) != count) {
3008 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3009 return NULL;
3010 }
3011 result_len = self_len + product;
3012 if (result_len < 0) {
3013 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3014 return NULL;
3015 }
3016
3017 if ( (result = (PyStringObject *)
3018 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3019 return NULL;
3020 result_s = PyString_AS_STRING(result);
3021
3022 start = self_s;
3023 end = self_s + self_len;
3024 while (count-- > 0) {
3025 next = findchar(start, end-start, from_c);
3026 if (next == NULL)
3027 break;
3028
3029 if (next == start) {
3030 /* replace with the 'to' */
3031 memcpy(result_s, to_s, to_len);
3032 result_s += to_len;
3033 start += 1;
3034 } else {
3035 /* copy the unchanged old then the 'to' */
3036 memcpy(result_s, start, next-start);
3037 result_s += (next-start);
3038 memcpy(result_s, to_s, to_len);
3039 result_s += to_len;
3040 start = next+1;
3041 }
3042 }
3043 /* Copy the remainder of the remaining string */
3044 memcpy(result_s, start, end-start);
3045
3046 return result;
3047}
3048
3049/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3050static PyStringObject *
3051replace_substring(PyStringObject *self,
3052 PyStringObject *from,
3053 PyStringObject *to,
3054 Py_ssize_t maxcount) {
3055 char *self_s, *from_s, *to_s, *result_s;
3056 char *start, *next, *end;
3057 Py_ssize_t self_len, from_len, to_len, result_len;
3058 Py_ssize_t count, offset, product;
3059 PyStringObject *result;
3060
3061 self_s = PyString_AS_STRING(self);
3062 self_len = PyString_GET_SIZE(self);
3063 from_s = PyString_AS_STRING(from);
3064 from_len = PyString_GET_SIZE(from);
3065
3066 count = countstring(self_s, self_len,
3067 from_s, from_len,
3068 0, self_len, FORWARD);
3069 if (count > maxcount)
3070 count = maxcount;
3071
3072 if (count == 0) {
3073 /* no matches, return unchanged */
3074 return return_self(self);
3075 }
3076
3077 to_s = PyString_AS_STRING(to);
3078 to_len = PyString_GET_SIZE(to);
3079
3080 /* Check for overflow */
3081 /* result_len = self_len + count * (to_len-from_len) */
3082 product = count * (to_len-from_len);
3083 if (product / (to_len-from_len) != count) {
3084 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3085 return NULL;
3086 }
3087 result_len = self_len + product;
3088 if (result_len < 0) {
3089 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3090 return NULL;
3091 }
3092
3093 if ( (result = (PyStringObject *)
3094 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3095 return NULL;
3096 result_s = PyString_AS_STRING(result);
3097
3098 start = self_s;
3099 end = self_s + self_len;
3100 while (count-- > 0) {
3101 offset = findstring(start, end-start,
3102 from_s, from_len,
3103 0, end-start, FORWARD);
3104 if (offset == -1)
3105 break;
3106 next = start+offset;
3107 if (next == start) {
3108 /* replace with the 'to' */
3109 memcpy(result_s, to_s, to_len);
3110 result_s += to_len;
3111 start += from_len;
3112 } else {
3113 /* copy the unchanged old then the 'to' */
3114 memcpy(result_s, start, next-start);
3115 result_s += (next-start);
3116 memcpy(result_s, to_s, to_len);
3117 result_s += to_len;
3118 start = next+from_len;
3119 }
3120 }
3121 /* Copy the remainder of the remaining string */
3122 memcpy(result_s, start, end-start);
3123
3124 return result;
3125}
3126
3127
3128static PyStringObject *
3129replace(PyStringObject *self,
3130 PyStringObject *from,
3131 PyStringObject *to,
3132 Py_ssize_t maxcount)
3133{
3134 Py_ssize_t from_len, to_len;
3135
3136 if (maxcount < 0) {
3137 maxcount = PY_SSIZE_T_MAX;
3138 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3139 /* nothing to do; return the original string */
3140 return return_self(self);
3141 }
3142
3143 from_len = PyString_GET_SIZE(from);
3144 to_len = PyString_GET_SIZE(to);
3145
3146 if (maxcount == 0 ||
3147 (from_len == 0 && to_len == 0)) {
3148 /* nothing to do; return the original string */
3149 return return_self(self);
3150 }
3151
3152 /* Handle zero-length special cases */
3153
3154 if (from_len == 0) {
3155 /* insert the 'to' string everywhere. */
3156 /* >>> "Python".replace("", ".") */
3157 /* '.P.y.t.h.o.n.' */
3158 return replace_interleave(self, to, maxcount);
3159 }
3160
3161 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3162 /* point for an empty self string to generate a non-empty string */
3163 /* Special case so the remaining code always gets a non-empty string */
3164 if (PyString_GET_SIZE(self) == 0) {
3165 return return_self(self);
3166 }
3167
3168 if (to_len == 0) {
3169 /* delete all occurances of 'from' string */
3170 if (from_len == 1) {
3171 return replace_delete_single_character(
3172 self, PyString_AS_STRING(from)[0], maxcount);
3173 } else {
3174 return replace_delete_substring(self, from, maxcount);
3175 }
3176 }
3177
3178 /* Handle special case where both strings have the same length */
3179
3180 if (from_len == to_len) {
3181 if (from_len == 1) {
3182 return replace_single_character_in_place(
3183 self,
3184 PyString_AS_STRING(from)[0],
3185 PyString_AS_STRING(to)[0],
3186 maxcount);
3187 } else {
3188 return replace_substring_in_place(
3189 self, from, to, maxcount);
3190 }
3191 }
3192
3193 /* Otherwise use the more generic algorithms */
3194 if (from_len == 1) {
3195 return replace_single_character(self, PyString_AS_STRING(from)[0],
3196 to, maxcount);
3197 } else {
3198 /* len('from')>=2, len('to')>=1 */
3199 return replace_substring(self, from, to, maxcount);
3200 }
3201}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003203PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003204"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003205\n\
3206Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003207old replaced by new. If the optional argument count is\n\
3208given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209
3210static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003211string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003213 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003214 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003215 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003216 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003217
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003218 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003219 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003221 if (PyString_Check(from)) {
3222 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003224#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003225 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003226 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003227 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003228#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003229 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 return NULL;
3231
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003232 if (PyString_Check(to)) {
3233 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003235#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003236 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003237 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003238 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003239#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003240 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241 return NULL;
3242
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003243 return (PyObject *)replace((PyStringObject *) self,
3244 (PyStringObject *) from,
3245 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003246}
3247
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003248/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003249
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003250PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003251"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003252\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003253Return True if S starts with the specified prefix, False otherwise.\n\
3254With optional start, test S beginning at that position.\n\
3255With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256
3257static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003258string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003259{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003261 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003263 Py_ssize_t plen;
3264 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003265 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003267
Guido van Rossumc6821402000-05-08 14:08:05 +00003268 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3269 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 return NULL;
3271 if (PyString_Check(subobj)) {
3272 prefix = PyString_AS_STRING(subobj);
3273 plen = PyString_GET_SIZE(subobj);
3274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003275#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003276 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003277 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003278 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003279 subobj, start, end, -1);
3280 if (rc == -1)
3281 return NULL;
3282 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003283 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003284 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003285#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003287 return NULL;
3288
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003289 string_adjust_indices(&start, &end, len);
3290
3291 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003292 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003293
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003294 if (end-start >= plen)
3295 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3296 else
3297 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003298}
3299
3300
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003301PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003302"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003303\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003304Return True if S ends with the specified suffix, False otherwise.\n\
3305With optional start, test S beginning at that position.\n\
3306With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003307
3308static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003309string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003310{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003311 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003312 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003314 Py_ssize_t slen;
3315 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003316 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003317 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003318
Guido van Rossumc6821402000-05-08 14:08:05 +00003319 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3320 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321 return NULL;
3322 if (PyString_Check(subobj)) {
3323 suffix = PyString_AS_STRING(subobj);
3324 slen = PyString_GET_SIZE(subobj);
3325 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003326#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003327 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003328 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003329 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003330 subobj, start, end, +1);
3331 if (rc == -1)
3332 return NULL;
3333 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003334 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003336#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003337 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003338 return NULL;
3339
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003340 string_adjust_indices(&start, &end, len);
3341
3342 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003343 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003344
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003345 if (end-slen > start)
3346 start = end - slen;
3347 if (end-start >= slen)
3348 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3349 else
3350 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003351}
3352
3353
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003354PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003355"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003356\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003357Encodes S using the codec registered for encoding. encoding defaults\n\
3358to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003359handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003360a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3361'xmlcharrefreplace' as well as any other name registered with\n\
3362codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003363
3364static PyObject *
3365string_encode(PyStringObject *self, PyObject *args)
3366{
3367 char *encoding = NULL;
3368 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003369 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003370
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003371 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3372 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003373 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003374 if (v == NULL)
3375 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003376 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3377 PyErr_Format(PyExc_TypeError,
3378 "encoder did not return a string/unicode object "
3379 "(type=%.400s)",
3380 v->ob_type->tp_name);
3381 Py_DECREF(v);
3382 return NULL;
3383 }
3384 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003385
3386 onError:
3387 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003388}
3389
3390
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003391PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003392"S.decode([encoding[,errors]]) -> object\n\
3393\n\
3394Decodes S using the codec registered for encoding. encoding defaults\n\
3395to the default encoding. errors may be given to set a different error\n\
3396handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003397a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3398as well as any other name registerd with codecs.register_error that is\n\
3399able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003400
3401static PyObject *
3402string_decode(PyStringObject *self, PyObject *args)
3403{
3404 char *encoding = NULL;
3405 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003406 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003407
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003408 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3409 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003410 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003411 if (v == NULL)
3412 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003413 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3414 PyErr_Format(PyExc_TypeError,
3415 "decoder did not return a string/unicode object "
3416 "(type=%.400s)",
3417 v->ob_type->tp_name);
3418 Py_DECREF(v);
3419 return NULL;
3420 }
3421 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003422
3423 onError:
3424 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003425}
3426
3427
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003428PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429"S.expandtabs([tabsize]) -> string\n\
3430\n\
3431Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003432If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433
3434static PyObject*
3435string_expandtabs(PyStringObject *self, PyObject *args)
3436{
3437 const char *e, *p;
3438 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003439 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440 PyObject *u;
3441 int tabsize = 8;
3442
3443 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3444 return NULL;
3445
Thomas Wouters7e474022000-07-16 12:04:32 +00003446 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447 i = j = 0;
3448 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3449 for (p = PyString_AS_STRING(self); p < e; p++)
3450 if (*p == '\t') {
3451 if (tabsize > 0)
3452 j += tabsize - (j % tabsize);
3453 }
3454 else {
3455 j++;
3456 if (*p == '\n' || *p == '\r') {
3457 i += j;
3458 j = 0;
3459 }
3460 }
3461
3462 /* Second pass: create output string and fill it */
3463 u = PyString_FromStringAndSize(NULL, i + j);
3464 if (!u)
3465 return NULL;
3466
3467 j = 0;
3468 q = PyString_AS_STRING(u);
3469
3470 for (p = PyString_AS_STRING(self); p < e; p++)
3471 if (*p == '\t') {
3472 if (tabsize > 0) {
3473 i = tabsize - (j % tabsize);
3474 j += i;
3475 while (i--)
3476 *q++ = ' ';
3477 }
3478 }
3479 else {
3480 j++;
3481 *q++ = *p;
3482 if (*p == '\n' || *p == '\r')
3483 j = 0;
3484 }
3485
3486 return u;
3487}
3488
Tim Peters8fa5dd02001-09-12 02:18:30 +00003489static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003490pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003491{
3492 PyObject *u;
3493
3494 if (left < 0)
3495 left = 0;
3496 if (right < 0)
3497 right = 0;
3498
Tim Peters8fa5dd02001-09-12 02:18:30 +00003499 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500 Py_INCREF(self);
3501 return (PyObject *)self;
3502 }
3503
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003504 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 left + PyString_GET_SIZE(self) + right);
3506 if (u) {
3507 if (left)
3508 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003509 memcpy(PyString_AS_STRING(u) + left,
3510 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003511 PyString_GET_SIZE(self));
3512 if (right)
3513 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3514 fill, right);
3515 }
3516
3517 return u;
3518}
3519
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003520PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003521"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003522"\n"
3523"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003524"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525
3526static PyObject *
3527string_ljust(PyStringObject *self, PyObject *args)
3528{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003529 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003530 char fillchar = ' ';
3531
Thomas Wouters4abb3662006-04-19 14:50:15 +00003532 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533 return NULL;
3534
Tim Peters8fa5dd02001-09-12 02:18:30 +00003535 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003536 Py_INCREF(self);
3537 return (PyObject*) self;
3538 }
3539
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003540 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541}
3542
3543
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003544PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003545"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003546"\n"
3547"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003548"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549
3550static PyObject *
3551string_rjust(PyStringObject *self, PyObject *args)
3552{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003553 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003554 char fillchar = ' ';
3555
Thomas Wouters4abb3662006-04-19 14:50:15 +00003556 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557 return NULL;
3558
Tim Peters8fa5dd02001-09-12 02:18:30 +00003559 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003560 Py_INCREF(self);
3561 return (PyObject*) self;
3562 }
3563
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003564 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003565}
3566
3567
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003569"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003570"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003571"Return S centered in a string of length width. Padding is\n"
3572"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003573
3574static PyObject *
3575string_center(PyStringObject *self, PyObject *args)
3576{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003577 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003578 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003579 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003580
Thomas Wouters4abb3662006-04-19 14:50:15 +00003581 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003582 return NULL;
3583
Tim Peters8fa5dd02001-09-12 02:18:30 +00003584 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003585 Py_INCREF(self);
3586 return (PyObject*) self;
3587 }
3588
3589 marg = width - PyString_GET_SIZE(self);
3590 left = marg / 2 + (marg & width & 1);
3591
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003592 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003593}
3594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003595PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003596"S.zfill(width) -> string\n"
3597"\n"
3598"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003599"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003600
3601static PyObject *
3602string_zfill(PyStringObject *self, PyObject *args)
3603{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003604 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003605 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003606 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003607 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003608
Thomas Wouters4abb3662006-04-19 14:50:15 +00003609 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003610 return NULL;
3611
3612 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003613 if (PyString_CheckExact(self)) {
3614 Py_INCREF(self);
3615 return (PyObject*) self;
3616 }
3617 else
3618 return PyString_FromStringAndSize(
3619 PyString_AS_STRING(self),
3620 PyString_GET_SIZE(self)
3621 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003622 }
3623
3624 fill = width - PyString_GET_SIZE(self);
3625
3626 s = pad(self, fill, 0, '0');
3627
3628 if (s == NULL)
3629 return NULL;
3630
3631 p = PyString_AS_STRING(s);
3632 if (p[fill] == '+' || p[fill] == '-') {
3633 /* move sign to beginning of string */
3634 p[0] = p[fill];
3635 p[fill] = '0';
3636 }
3637
3638 return (PyObject*) s;
3639}
3640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003641PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003642"S.isspace() -> bool\n\
3643\n\
3644Return True if all characters in S are whitespace\n\
3645and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646
3647static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003648string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649{
Fred Drakeba096332000-07-09 07:04:36 +00003650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003652 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 /* Shortcut for single character strings */
3655 if (PyString_GET_SIZE(self) == 1 &&
3656 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003662
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663 e = p + PyString_GET_SIZE(self);
3664 for (; p < e; p++) {
3665 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669}
3670
3671
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003672PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003674\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003675Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003677
3678static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003679string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680{
Fred Drakeba096332000-07-09 07:04:36 +00003681 register const unsigned char *p
3682 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003683 register const unsigned char *e;
3684
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003685 /* Shortcut for single character strings */
3686 if (PyString_GET_SIZE(self) == 1 &&
3687 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003689
3690 /* Special case for empty strings */
3691 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003693
3694 e = p + PyString_GET_SIZE(self);
3695 for (; p < e; p++) {
3696 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003698 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003700}
3701
3702
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003703PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003705\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003706Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003707and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003708
3709static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003710string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003711{
Fred Drakeba096332000-07-09 07:04:36 +00003712 register const unsigned char *p
3713 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003714 register const unsigned char *e;
3715
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003716 /* Shortcut for single character strings */
3717 if (PyString_GET_SIZE(self) == 1 &&
3718 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003719 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003720
3721 /* Special case for empty strings */
3722 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003724
3725 e = p + PyString_GET_SIZE(self);
3726 for (; p < e; p++) {
3727 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003729 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003731}
3732
3733
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003734PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003735"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003737Return True if all characters in S are digits\n\
3738and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739
3740static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003741string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742{
Fred Drakeba096332000-07-09 07:04:36 +00003743 register const unsigned char *p
3744 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003745 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 /* Shortcut for single character strings */
3748 if (PyString_GET_SIZE(self) == 1 &&
3749 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003752 /* Special case for empty strings */
3753 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003755
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756 e = p + PyString_GET_SIZE(self);
3757 for (; p < e; p++) {
3758 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003761 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003762}
3763
3764
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003765PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003766"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003768Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003769at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770
3771static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003772string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773{
Fred Drakeba096332000-07-09 07:04:36 +00003774 register const unsigned char *p
3775 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003776 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 int cased;
3778
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 /* Shortcut for single character strings */
3780 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003781 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003783 /* Special case for empty strings */
3784 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003785 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003786
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 e = p + PyString_GET_SIZE(self);
3788 cased = 0;
3789 for (; p < e; p++) {
3790 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003791 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 else if (!cased && islower(*p))
3793 cased = 1;
3794 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003795 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796}
3797
3798
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003799PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003800"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003802Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003803at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804
3805static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003806string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003807{
Fred Drakeba096332000-07-09 07:04:36 +00003808 register const unsigned char *p
3809 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003810 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811 int cased;
3812
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813 /* Shortcut for single character strings */
3814 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003815 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003817 /* Special case for empty strings */
3818 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003819 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003820
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 e = p + PyString_GET_SIZE(self);
3822 cased = 0;
3823 for (; p < e; p++) {
3824 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003825 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 else if (!cased && isupper(*p))
3827 cased = 1;
3828 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003829 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830}
3831
3832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003833PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003834"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003836Return True if S is a titlecased string and there is at least one\n\
3837character in S, i.e. uppercase characters may only follow uncased\n\
3838characters and lowercase characters only cased ones. Return False\n\
3839otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840
3841static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003842string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003843{
Fred Drakeba096332000-07-09 07:04:36 +00003844 register const unsigned char *p
3845 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003846 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847 int cased, previous_is_cased;
3848
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849 /* Shortcut for single character strings */
3850 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003851 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003853 /* Special case for empty strings */
3854 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003855 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003856
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857 e = p + PyString_GET_SIZE(self);
3858 cased = 0;
3859 previous_is_cased = 0;
3860 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003861 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003862
3863 if (isupper(ch)) {
3864 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003865 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003866 previous_is_cased = 1;
3867 cased = 1;
3868 }
3869 else if (islower(ch)) {
3870 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003871 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003872 previous_is_cased = 1;
3873 cased = 1;
3874 }
3875 else
3876 previous_is_cased = 0;
3877 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003878 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003879}
3880
3881
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003882PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003883"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003884\n\
3885Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003886Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003887is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003888
Guido van Rossum4c08d552000-03-10 22:55:18 +00003889static PyObject*
3890string_splitlines(PyStringObject *self, PyObject *args)
3891{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003892 register Py_ssize_t i;
3893 register Py_ssize_t j;
3894 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003895 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003896 PyObject *list;
3897 PyObject *str;
3898 char *data;
3899
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003900 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003901 return NULL;
3902
3903 data = PyString_AS_STRING(self);
3904 len = PyString_GET_SIZE(self);
3905
Guido van Rossum4c08d552000-03-10 22:55:18 +00003906 list = PyList_New(0);
3907 if (!list)
3908 goto onError;
3909
3910 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003911 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003912
Guido van Rossum4c08d552000-03-10 22:55:18 +00003913 /* Find a line and append it */
3914 while (i < len && data[i] != '\n' && data[i] != '\r')
3915 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003916
3917 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003918 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003919 if (i < len) {
3920 if (data[i] == '\r' && i + 1 < len &&
3921 data[i+1] == '\n')
3922 i += 2;
3923 else
3924 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003925 if (keepends)
3926 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003927 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003928 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003929 j = i;
3930 }
3931 if (j < len) {
3932 SPLIT_APPEND(data, j, len);
3933 }
3934
3935 return list;
3936
3937 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003938 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003939 return NULL;
3940}
3941
3942#undef SPLIT_APPEND
3943
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003944static PyObject *
3945string_getnewargs(PyStringObject *v)
3946{
3947 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3948}
3949
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003950
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003951static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003952string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003953 /* Counterparts of the obsolete stropmodule functions; except
3954 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003955 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3956 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003957 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003958 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3959 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003960 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3961 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3962 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3963 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3964 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3965 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3966 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003967 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3968 capitalize__doc__},
3969 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3970 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3971 endswith__doc__},
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00003972 {"partition", (PyCFunction)string_partition, METH_VARARGS,
3973 partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003974 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3975 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3976 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3977 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3978 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3979 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3980 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3981 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3982 startswith__doc__},
3983 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3984 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3985 swapcase__doc__},
3986 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3987 translate__doc__},
3988 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3989 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3990 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3991 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3992 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3993 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3994 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3995 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3996 expandtabs__doc__},
3997 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3998 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003999 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004000 {NULL, NULL} /* sentinel */
4001};
4002
Jeremy Hylton938ace62002-07-17 16:30:39 +00004003static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004004str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4005
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004006static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004008{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004010 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011
Guido van Rossumae960af2001-08-30 03:11:59 +00004012 if (type != &PyString_Type)
4013 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004014 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4015 return NULL;
4016 if (x == NULL)
4017 return PyString_FromString("");
4018 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004019}
4020
Guido van Rossumae960af2001-08-30 03:11:59 +00004021static PyObject *
4022str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4023{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004024 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004025 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004026
4027 assert(PyType_IsSubtype(type, &PyString_Type));
4028 tmp = string_new(&PyString_Type, args, kwds);
4029 if (tmp == NULL)
4030 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004031 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004032 n = PyString_GET_SIZE(tmp);
4033 pnew = type->tp_alloc(type, n);
4034 if (pnew != NULL) {
4035 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004036 ((PyStringObject *)pnew)->ob_shash =
4037 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004038 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004039 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004040 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004041 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004042}
4043
Guido van Rossumcacfc072002-05-24 19:01:59 +00004044static PyObject *
4045basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4046{
4047 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004048 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004049 return NULL;
4050}
4051
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004052static PyObject *
4053string_mod(PyObject *v, PyObject *w)
4054{
4055 if (!PyString_Check(v)) {
4056 Py_INCREF(Py_NotImplemented);
4057 return Py_NotImplemented;
4058 }
4059 return PyString_Format(v, w);
4060}
4061
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004062PyDoc_STRVAR(basestring_doc,
4063"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004064
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004065static PyNumberMethods string_as_number = {
4066 0, /*nb_add*/
4067 0, /*nb_subtract*/
4068 0, /*nb_multiply*/
4069 0, /*nb_divide*/
4070 string_mod, /*nb_remainder*/
4071};
4072
4073
Guido van Rossumcacfc072002-05-24 19:01:59 +00004074PyTypeObject PyBaseString_Type = {
4075 PyObject_HEAD_INIT(&PyType_Type)
4076 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004077 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004078 0,
4079 0,
4080 0, /* tp_dealloc */
4081 0, /* tp_print */
4082 0, /* tp_getattr */
4083 0, /* tp_setattr */
4084 0, /* tp_compare */
4085 0, /* tp_repr */
4086 0, /* tp_as_number */
4087 0, /* tp_as_sequence */
4088 0, /* tp_as_mapping */
4089 0, /* tp_hash */
4090 0, /* tp_call */
4091 0, /* tp_str */
4092 0, /* tp_getattro */
4093 0, /* tp_setattro */
4094 0, /* tp_as_buffer */
4095 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4096 basestring_doc, /* tp_doc */
4097 0, /* tp_traverse */
4098 0, /* tp_clear */
4099 0, /* tp_richcompare */
4100 0, /* tp_weaklistoffset */
4101 0, /* tp_iter */
4102 0, /* tp_iternext */
4103 0, /* tp_methods */
4104 0, /* tp_members */
4105 0, /* tp_getset */
4106 &PyBaseObject_Type, /* tp_base */
4107 0, /* tp_dict */
4108 0, /* tp_descr_get */
4109 0, /* tp_descr_set */
4110 0, /* tp_dictoffset */
4111 0, /* tp_init */
4112 0, /* tp_alloc */
4113 basestring_new, /* tp_new */
4114 0, /* tp_free */
4115};
4116
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004117PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004118"str(object) -> string\n\
4119\n\
4120Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004121If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004122
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123PyTypeObject PyString_Type = {
4124 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004125 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004126 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004127 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004128 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004129 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004130 (printfunc)string_print, /* tp_print */
4131 0, /* tp_getattr */
4132 0, /* tp_setattr */
4133 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004134 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004135 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004136 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004137 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004138 (hashfunc)string_hash, /* tp_hash */
4139 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004140 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004141 PyObject_GenericGetAttr, /* tp_getattro */
4142 0, /* tp_setattro */
4143 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004145 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004146 string_doc, /* tp_doc */
4147 0, /* tp_traverse */
4148 0, /* tp_clear */
4149 (richcmpfunc)string_richcompare, /* tp_richcompare */
4150 0, /* tp_weaklistoffset */
4151 0, /* tp_iter */
4152 0, /* tp_iternext */
4153 string_methods, /* tp_methods */
4154 0, /* tp_members */
4155 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004156 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004157 0, /* tp_dict */
4158 0, /* tp_descr_get */
4159 0, /* tp_descr_set */
4160 0, /* tp_dictoffset */
4161 0, /* tp_init */
4162 0, /* tp_alloc */
4163 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004164 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004165};
4166
4167void
Fred Drakeba096332000-07-09 07:04:36 +00004168PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004169{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004170 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004171 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004172 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004173 if (w == NULL || !PyString_Check(*pv)) {
4174 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004175 *pv = NULL;
4176 return;
4177 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004178 v = string_concat((PyStringObject *) *pv, w);
4179 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004180 *pv = v;
4181}
4182
Guido van Rossum013142a1994-08-30 08:19:36 +00004183void
Fred Drakeba096332000-07-09 07:04:36 +00004184PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004185{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004186 PyString_Concat(pv, w);
4187 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004188}
4189
4190
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004191/* The following function breaks the notion that strings are immutable:
4192 it changes the size of a string. We get away with this only if there
4193 is only one module referencing the object. You can also think of it
4194 as creating a new string object and destroying the old one, only
4195 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004196 already be known to some other part of the code...
4197 Note that if there's not enough memory to resize the string, the original
4198 string object at *pv is deallocated, *pv is set to NULL, an "out of
4199 memory" exception is set, and -1 is returned. Else (on success) 0 is
4200 returned, and the value in *pv may or may not be the same as on input.
4201 As always, an extra byte is allocated for a trailing \0 byte (newsize
4202 does *not* include that), and a trailing \0 byte is stored.
4203*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004204
4205int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004206_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004207{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004208 register PyObject *v;
4209 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004210 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004211 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4212 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004213 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004214 Py_DECREF(v);
4215 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004216 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004217 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004218 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004219 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 _Py_ForgetReference(v);
4221 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004222 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004223 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004224 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004225 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004226 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004227 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004228 _Py_NewReference(*pv);
4229 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004230 sv->ob_size = newsize;
4231 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004232 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004233 return 0;
4234}
Guido van Rossume5372401993-03-16 12:15:04 +00004235
4236/* Helpers for formatstring */
4237
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004238static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004239getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004240{
Thomas Wouters977485d2006-02-16 15:59:12 +00004241 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004242 if (argidx < arglen) {
4243 (*p_argidx)++;
4244 if (arglen < 0)
4245 return args;
4246 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004247 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004248 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004249 PyErr_SetString(PyExc_TypeError,
4250 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004251 return NULL;
4252}
4253
Tim Peters38fd5b62000-09-21 05:43:11 +00004254/* Format codes
4255 * F_LJUST '-'
4256 * F_SIGN '+'
4257 * F_BLANK ' '
4258 * F_ALT '#'
4259 * F_ZERO '0'
4260 */
Guido van Rossume5372401993-03-16 12:15:04 +00004261#define F_LJUST (1<<0)
4262#define F_SIGN (1<<1)
4263#define F_BLANK (1<<2)
4264#define F_ALT (1<<3)
4265#define F_ZERO (1<<4)
4266
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004267static int
Fred Drakeba096332000-07-09 07:04:36 +00004268formatfloat(char *buf, size_t buflen, int flags,
4269 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004270{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004271 /* fmt = '%#.' + `prec` + `type`
4272 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004273 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004274 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004275 x = PyFloat_AsDouble(v);
4276 if (x == -1.0 && PyErr_Occurred()) {
4277 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004278 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004279 }
Guido van Rossume5372401993-03-16 12:15:04 +00004280 if (prec < 0)
4281 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004282 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4283 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004284 /* Worst case length calc to ensure no buffer overrun:
4285
4286 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004287 fmt = %#.<prec>g
4288 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004289 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004290 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004291
4292 'f' formats:
4293 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4294 len = 1 + 50 + 1 + prec = 52 + prec
4295
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004296 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004297 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004298
4299 */
4300 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4301 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004302 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004303 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004304 return -1;
4305 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004306 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4307 (flags&F_ALT) ? "#" : "",
4308 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004309 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004310 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004311}
4312
Tim Peters38fd5b62000-09-21 05:43:11 +00004313/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4314 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4315 * Python's regular ints.
4316 * Return value: a new PyString*, or NULL if error.
4317 * . *pbuf is set to point into it,
4318 * *plen set to the # of chars following that.
4319 * Caller must decref it when done using pbuf.
4320 * The string starting at *pbuf is of the form
4321 * "-"? ("0x" | "0X")? digit+
4322 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004323 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004324 * There will be at least prec digits, zero-filled on the left if
4325 * necessary to get that many.
4326 * val object to be converted
4327 * flags bitmask of format flags; only F_ALT is looked at
4328 * prec minimum number of digits; 0-fill on left if needed
4329 * type a character in [duoxX]; u acts the same as d
4330 *
4331 * CAUTION: o, x and X conversions on regular ints can never
4332 * produce a '-' sign, but can for Python's unbounded ints.
4333 */
4334PyObject*
4335_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4336 char **pbuf, int *plen)
4337{
4338 PyObject *result = NULL;
4339 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004340 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004341 int sign; /* 1 if '-', else 0 */
4342 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004343 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004344 int numdigits; /* len == numnondigits + numdigits */
4345 int numnondigits = 0;
4346
4347 switch (type) {
4348 case 'd':
4349 case 'u':
4350 result = val->ob_type->tp_str(val);
4351 break;
4352 case 'o':
4353 result = val->ob_type->tp_as_number->nb_oct(val);
4354 break;
4355 case 'x':
4356 case 'X':
4357 numnondigits = 2;
4358 result = val->ob_type->tp_as_number->nb_hex(val);
4359 break;
4360 default:
4361 assert(!"'type' not in [duoxX]");
4362 }
4363 if (!result)
4364 return NULL;
4365
4366 /* To modify the string in-place, there can only be one reference. */
4367 if (result->ob_refcnt != 1) {
4368 PyErr_BadInternalCall();
4369 return NULL;
4370 }
4371 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004372 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004373 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004374 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4375 return NULL;
4376 }
4377 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004378 if (buf[len-1] == 'L') {
4379 --len;
4380 buf[len] = '\0';
4381 }
4382 sign = buf[0] == '-';
4383 numnondigits += sign;
4384 numdigits = len - numnondigits;
4385 assert(numdigits > 0);
4386
Tim Petersfff53252001-04-12 18:38:48 +00004387 /* Get rid of base marker unless F_ALT */
4388 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004389 /* Need to skip 0x, 0X or 0. */
4390 int skipped = 0;
4391 switch (type) {
4392 case 'o':
4393 assert(buf[sign] == '0');
4394 /* If 0 is only digit, leave it alone. */
4395 if (numdigits > 1) {
4396 skipped = 1;
4397 --numdigits;
4398 }
4399 break;
4400 case 'x':
4401 case 'X':
4402 assert(buf[sign] == '0');
4403 assert(buf[sign + 1] == 'x');
4404 skipped = 2;
4405 numnondigits -= 2;
4406 break;
4407 }
4408 if (skipped) {
4409 buf += skipped;
4410 len -= skipped;
4411 if (sign)
4412 buf[0] = '-';
4413 }
4414 assert(len == numnondigits + numdigits);
4415 assert(numdigits > 0);
4416 }
4417
4418 /* Fill with leading zeroes to meet minimum width. */
4419 if (prec > numdigits) {
4420 PyObject *r1 = PyString_FromStringAndSize(NULL,
4421 numnondigits + prec);
4422 char *b1;
4423 if (!r1) {
4424 Py_DECREF(result);
4425 return NULL;
4426 }
4427 b1 = PyString_AS_STRING(r1);
4428 for (i = 0; i < numnondigits; ++i)
4429 *b1++ = *buf++;
4430 for (i = 0; i < prec - numdigits; i++)
4431 *b1++ = '0';
4432 for (i = 0; i < numdigits; i++)
4433 *b1++ = *buf++;
4434 *b1 = '\0';
4435 Py_DECREF(result);
4436 result = r1;
4437 buf = PyString_AS_STRING(result);
4438 len = numnondigits + prec;
4439 }
4440
4441 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004442 if (type == 'X') {
4443 /* Need to convert all lower case letters to upper case.
4444 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004445 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004446 if (buf[i] >= 'a' && buf[i] <= 'x')
4447 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004448 }
4449 *pbuf = buf;
4450 *plen = len;
4451 return result;
4452}
4453
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004454static int
Fred Drakeba096332000-07-09 07:04:36 +00004455formatint(char *buf, size_t buflen, int flags,
4456 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004457{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004458 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004459 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4460 + 1 + 1 = 24 */
4461 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004462 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004463 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004464
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004465 x = PyInt_AsLong(v);
4466 if (x == -1 && PyErr_Occurred()) {
4467 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004468 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004469 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004470 if (x < 0 && type == 'u') {
4471 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004472 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004473 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4474 sign = "-";
4475 else
4476 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004477 if (prec < 0)
4478 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004479
4480 if ((flags & F_ALT) &&
4481 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004482 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004483 * of issues that cause pain:
4484 * - when 0 is being converted, the C standard leaves off
4485 * the '0x' or '0X', which is inconsistent with other
4486 * %#x/%#X conversions and inconsistent with Python's
4487 * hex() function
4488 * - there are platforms that violate the standard and
4489 * convert 0 with the '0x' or '0X'
4490 * (Metrowerks, Compaq Tru64)
4491 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004492 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004493 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004494 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004495 * We can achieve the desired consistency by inserting our
4496 * own '0x' or '0X' prefix, and substituting %x/%X in place
4497 * of %#x/%#X.
4498 *
4499 * Note that this is the same approach as used in
4500 * formatint() in unicodeobject.c
4501 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004502 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4503 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004504 }
4505 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004506 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4507 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004508 prec, type);
4509 }
4510
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004511 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4512 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004513 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004514 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004515 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004516 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004517 return -1;
4518 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004519 if (sign[0])
4520 PyOS_snprintf(buf, buflen, fmt, -x);
4521 else
4522 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004523 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004524}
4525
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004526static int
Fred Drakeba096332000-07-09 07:04:36 +00004527formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004528{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004529 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 if (PyString_Check(v)) {
4531 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004532 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004533 }
4534 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004536 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004537 }
4538 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004539 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004540}
4541
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004542/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4543
4544 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4545 chars are formatted. XXX This is a magic number. Each formatting
4546 routine does bounds checking to ensure no overflow, but a better
4547 solution may be to malloc a buffer of appropriate size for each
4548 format. For now, the current solution is sufficient.
4549*/
4550#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004551
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004552PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004553PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004554{
4555 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004556 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004557 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004558 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004559 PyObject *result, *orig_args;
4560#ifdef Py_USING_UNICODE
4561 PyObject *v, *w;
4562#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004563 PyObject *dict = NULL;
4564 if (format == NULL || !PyString_Check(format) || args == NULL) {
4565 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004566 return NULL;
4567 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004568 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004569 fmt = PyString_AS_STRING(format);
4570 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004571 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004573 if (result == NULL)
4574 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 res = PyString_AsString(result);
4576 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004577 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004578 argidx = 0;
4579 }
4580 else {
4581 arglen = -1;
4582 argidx = -2;
4583 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004584 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4585 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004586 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004587 while (--fmtcnt >= 0) {
4588 if (*fmt != '%') {
4589 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004590 rescnt = fmtcnt + 100;
4591 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004593 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004594 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004596 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004597 }
4598 *res++ = *fmt++;
4599 }
4600 else {
4601 /* Got a format specifier */
4602 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004603 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004604 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004605 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004606 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004607 PyObject *v = NULL;
4608 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004609 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004610 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004611 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004612 char formatbuf[FORMATBUFLEN];
4613 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004614#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004615 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004616 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004617#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004618
Guido van Rossumda9c2711996-12-05 21:58:58 +00004619 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004620 if (*fmt == '(') {
4621 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004622 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004623 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004624 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004625
4626 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004628 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004629 goto error;
4630 }
4631 ++fmt;
4632 --fmtcnt;
4633 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004634 /* Skip over balanced parentheses */
4635 while (pcount > 0 && --fmtcnt >= 0) {
4636 if (*fmt == ')')
4637 --pcount;
4638 else if (*fmt == '(')
4639 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004640 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004641 }
4642 keylen = fmt - keystart - 1;
4643 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004644 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004645 "incomplete format key");
4646 goto error;
4647 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004648 key = PyString_FromStringAndSize(keystart,
4649 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004650 if (key == NULL)
4651 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004652 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004653 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004654 args_owned = 0;
4655 }
4656 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004657 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004658 if (args == NULL) {
4659 goto error;
4660 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004661 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004662 arglen = -1;
4663 argidx = -2;
4664 }
Guido van Rossume5372401993-03-16 12:15:04 +00004665 while (--fmtcnt >= 0) {
4666 switch (c = *fmt++) {
4667 case '-': flags |= F_LJUST; continue;
4668 case '+': flags |= F_SIGN; continue;
4669 case ' ': flags |= F_BLANK; continue;
4670 case '#': flags |= F_ALT; continue;
4671 case '0': flags |= F_ZERO; continue;
4672 }
4673 break;
4674 }
4675 if (c == '*') {
4676 v = getnextarg(args, arglen, &argidx);
4677 if (v == NULL)
4678 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004679 if (!PyInt_Check(v)) {
4680 PyErr_SetString(PyExc_TypeError,
4681 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004682 goto error;
4683 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004684 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004685 if (width < 0) {
4686 flags |= F_LJUST;
4687 width = -width;
4688 }
Guido van Rossume5372401993-03-16 12:15:04 +00004689 if (--fmtcnt >= 0)
4690 c = *fmt++;
4691 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004692 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004693 width = c - '0';
4694 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004695 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004696 if (!isdigit(c))
4697 break;
4698 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004699 PyErr_SetString(
4700 PyExc_ValueError,
4701 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004702 goto error;
4703 }
4704 width = width*10 + (c - '0');
4705 }
4706 }
4707 if (c == '.') {
4708 prec = 0;
4709 if (--fmtcnt >= 0)
4710 c = *fmt++;
4711 if (c == '*') {
4712 v = getnextarg(args, arglen, &argidx);
4713 if (v == NULL)
4714 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004715 if (!PyInt_Check(v)) {
4716 PyErr_SetString(
4717 PyExc_TypeError,
4718 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004719 goto error;
4720 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004721 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004722 if (prec < 0)
4723 prec = 0;
4724 if (--fmtcnt >= 0)
4725 c = *fmt++;
4726 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004727 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004728 prec = c - '0';
4729 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004730 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004731 if (!isdigit(c))
4732 break;
4733 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004734 PyErr_SetString(
4735 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004736 "prec too big");
4737 goto error;
4738 }
4739 prec = prec*10 + (c - '0');
4740 }
4741 }
4742 } /* prec */
4743 if (fmtcnt >= 0) {
4744 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004745 if (--fmtcnt >= 0)
4746 c = *fmt++;
4747 }
4748 }
4749 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004750 PyErr_SetString(PyExc_ValueError,
4751 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004752 goto error;
4753 }
4754 if (c != '%') {
4755 v = getnextarg(args, arglen, &argidx);
4756 if (v == NULL)
4757 goto error;
4758 }
4759 sign = 0;
4760 fill = ' ';
4761 switch (c) {
4762 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004763 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004764 len = 1;
4765 break;
4766 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004767#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004768 if (PyUnicode_Check(v)) {
4769 fmt = fmt_start;
4770 argidx = argidx_start;
4771 goto unicode;
4772 }
Georg Brandld45014b2005-10-01 17:06:00 +00004773#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004774 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004775#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004776 if (temp != NULL && PyUnicode_Check(temp)) {
4777 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004778 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004779 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004780 goto unicode;
4781 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004782#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004783 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004784 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004785 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004786 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004787 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004788 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004789 if (!PyString_Check(temp)) {
4790 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004791 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004792 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004793 goto error;
4794 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004795 pbuf = PyString_AS_STRING(temp);
4796 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004797 if (prec >= 0 && len > prec)
4798 len = prec;
4799 break;
4800 case 'i':
4801 case 'd':
4802 case 'u':
4803 case 'o':
4804 case 'x':
4805 case 'X':
4806 if (c == 'i')
4807 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004808 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004809 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004810 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004811 prec, c, &pbuf, &ilen);
4812 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 if (!temp)
4814 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004815 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004816 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 else {
4818 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004819 len = formatint(pbuf,
4820 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004821 flags, prec, c, v);
4822 if (len < 0)
4823 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004824 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004825 }
4826 if (flags & F_ZERO)
4827 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004828 break;
4829 case 'e':
4830 case 'E':
4831 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004832 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004833 case 'g':
4834 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004835 if (c == 'F')
4836 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004837 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004838 len = formatfloat(pbuf, sizeof(formatbuf),
4839 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004840 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004841 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004842 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004843 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004844 fill = '0';
4845 break;
4846 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004847#ifdef Py_USING_UNICODE
4848 if (PyUnicode_Check(v)) {
4849 fmt = fmt_start;
4850 argidx = argidx_start;
4851 goto unicode;
4852 }
4853#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004854 pbuf = formatbuf;
4855 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004856 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004857 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004858 break;
4859 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004860 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004861 "unsupported format character '%c' (0x%x) "
4862 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004863 c, c,
4864 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004865 goto error;
4866 }
4867 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004868 if (*pbuf == '-' || *pbuf == '+') {
4869 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004870 len--;
4871 }
4872 else if (flags & F_SIGN)
4873 sign = '+';
4874 else if (flags & F_BLANK)
4875 sign = ' ';
4876 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004877 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004878 }
4879 if (width < len)
4880 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004881 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004882 reslen -= rescnt;
4883 rescnt = width + fmtcnt + 100;
4884 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004885 if (reslen < 0) {
4886 Py_DECREF(result);
4887 return PyErr_NoMemory();
4888 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004889 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004890 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004891 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004892 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004893 }
4894 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004895 if (fill != ' ')
4896 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004897 rescnt--;
4898 if (width > len)
4899 width--;
4900 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004901 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4902 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004903 assert(pbuf[1] == c);
4904 if (fill != ' ') {
4905 *res++ = *pbuf++;
4906 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004907 }
Tim Petersfff53252001-04-12 18:38:48 +00004908 rescnt -= 2;
4909 width -= 2;
4910 if (width < 0)
4911 width = 0;
4912 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004913 }
4914 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004915 do {
4916 --rescnt;
4917 *res++ = fill;
4918 } while (--width > len);
4919 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004920 if (fill == ' ') {
4921 if (sign)
4922 *res++ = sign;
4923 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004924 (c == 'x' || c == 'X')) {
4925 assert(pbuf[0] == '0');
4926 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004927 *res++ = *pbuf++;
4928 *res++ = *pbuf++;
4929 }
4930 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004931 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004932 res += len;
4933 rescnt -= len;
4934 while (--width >= len) {
4935 --rescnt;
4936 *res++ = ' ';
4937 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004938 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004939 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004940 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004941 goto error;
4942 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004943 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004944 } /* '%' */
4945 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004946 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004947 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004948 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004949 goto error;
4950 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004951 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004952 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004954 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004955 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004956
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004957#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004958 unicode:
4959 if (args_owned) {
4960 Py_DECREF(args);
4961 args_owned = 0;
4962 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004963 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004964 if (PyTuple_Check(orig_args) && argidx > 0) {
4965 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004966 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004967 v = PyTuple_New(n);
4968 if (v == NULL)
4969 goto error;
4970 while (--n >= 0) {
4971 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4972 Py_INCREF(w);
4973 PyTuple_SET_ITEM(v, n, w);
4974 }
4975 args = v;
4976 } else {
4977 Py_INCREF(orig_args);
4978 args = orig_args;
4979 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004980 args_owned = 1;
4981 /* Take what we have of the result and let the Unicode formatting
4982 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004983 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004984 if (_PyString_Resize(&result, rescnt))
4985 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004986 fmtcnt = PyString_GET_SIZE(format) - \
4987 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004988 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4989 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004990 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004991 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004992 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004993 if (v == NULL)
4994 goto error;
4995 /* Paste what we have (result) to what the Unicode formatting
4996 function returned (v) and return the result (or error) */
4997 w = PyUnicode_Concat(result, v);
4998 Py_DECREF(result);
4999 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005000 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005001 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005002#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005003
Guido van Rossume5372401993-03-16 12:15:04 +00005004 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005005 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005006 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005007 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005008 }
Guido van Rossume5372401993-03-16 12:15:04 +00005009 return NULL;
5010}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005011
Guido van Rossum2a61e741997-01-18 07:55:05 +00005012void
Fred Drakeba096332000-07-09 07:04:36 +00005013PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005014{
5015 register PyStringObject *s = (PyStringObject *)(*p);
5016 PyObject *t;
5017 if (s == NULL || !PyString_Check(s))
5018 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005019 /* If it's a string subclass, we don't really know what putting
5020 it in the interned dict might do. */
5021 if (!PyString_CheckExact(s))
5022 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005023 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005024 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005025 if (interned == NULL) {
5026 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005027 if (interned == NULL) {
5028 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005029 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005030 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005031 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005032 t = PyDict_GetItem(interned, (PyObject *)s);
5033 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005034 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005035 Py_DECREF(*p);
5036 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005037 return;
5038 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005039
Armin Rigo79f7ad22004-08-07 19:27:39 +00005040 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005041 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005042 return;
5043 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005044 /* The two references in interned are not counted by refcnt.
5045 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00005046 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005047 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005048}
5049
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005050void
5051PyString_InternImmortal(PyObject **p)
5052{
5053 PyString_InternInPlace(p);
5054 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5055 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5056 Py_INCREF(*p);
5057 }
5058}
5059
Guido van Rossum2a61e741997-01-18 07:55:05 +00005060
5061PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005062PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005063{
5064 PyObject *s = PyString_FromString(cp);
5065 if (s == NULL)
5066 return NULL;
5067 PyString_InternInPlace(&s);
5068 return s;
5069}
5070
Guido van Rossum8cf04761997-08-02 02:57:45 +00005071void
Fred Drakeba096332000-07-09 07:04:36 +00005072PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005073{
5074 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005075 for (i = 0; i < UCHAR_MAX + 1; i++) {
5076 Py_XDECREF(characters[i]);
5077 characters[i] = NULL;
5078 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005079 Py_XDECREF(nullstring);
5080 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005081}
Barry Warsawa903ad982001-02-23 16:40:48 +00005082
Barry Warsawa903ad982001-02-23 16:40:48 +00005083void _Py_ReleaseInternedStrings(void)
5084{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005085 PyObject *keys;
5086 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005087 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005088
5089 if (interned == NULL || !PyDict_Check(interned))
5090 return;
5091 keys = PyDict_Keys(interned);
5092 if (keys == NULL || !PyList_Check(keys)) {
5093 PyErr_Clear();
5094 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005095 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005096
5097 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5098 detector, interned strings are not forcibly deallocated; rather, we
5099 give them their stolen references back, and then clear and DECREF
5100 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005101
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005102 fprintf(stderr, "releasing interned strings\n");
5103 n = PyList_GET_SIZE(keys);
5104 for (i = 0; i < n; i++) {
5105 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5106 switch (s->ob_sstate) {
5107 case SSTATE_NOT_INTERNED:
5108 /* XXX Shouldn't happen */
5109 break;
5110 case SSTATE_INTERNED_IMMORTAL:
5111 s->ob_refcnt += 1;
5112 break;
5113 case SSTATE_INTERNED_MORTAL:
5114 s->ob_refcnt += 2;
5115 break;
5116 default:
5117 Py_FatalError("Inconsistent interned string state.");
5118 }
5119 s->ob_sstate = SSTATE_NOT_INTERNED;
5120 }
5121 Py_DECREF(keys);
5122 PyDict_Clear(interned);
5123 Py_DECREF(interned);
5124 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005125}