blob: b4c89505f7d7fbf780bb28409a3f36c2acf3d0d7 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Fredrik Lundhaf722372006-05-25 17:55:31 +00008#undef USE_INLINE /* XXX - set via configure? */
9
10#if defined(_MSC_VER) /* this is taken from _sre.c */
11#pragma warning(disable: 4710)
12/* fastest possible local call under MSVC */
13#define LOCAL(type) static __inline type __fastcall
14#elif defined(USE_INLINE)
15#define LOCAL(type) static inline type
16#else
17#define LOCAL(type) static type
18#endif
19
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020#ifdef COUNT_ALLOCS
21int null_strings, one_strings;
22#endif
23
Guido van Rossumc0b618a1997-05-02 03:12:38 +000024static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000025static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026
Guido van Rossum45ec02a2002-08-19 21:43:18 +000027/* This dictionary holds all interned strings. Note that references to
28 strings in this dictionary are *not* counted in the string's ob_refcnt.
29 When the interned string reaches a refcnt of 0 the string deallocation
30 function will delete the reference from this dictionary.
31
Tim Petersae1d0c92006-03-17 03:29:34 +000032 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000033 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
34*/
35static PyObject *interned;
36
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000039 For both PyString_FromString() and PyString_FromStringAndSize(), the
40 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000041 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000042
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000044 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 For PyString_FromStringAndSize(), the parameter the parameter `str' is
47 either NULL or else points to a string containing at least `size' bytes.
48 For PyString_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
54 PyString object must be treated as immutable and you must not fill in nor
55 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000056
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000057 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
59 allocated for string data, not counting the null terminating character. It
60 is therefore equal to the equal to the `size' parameter (for
61 PyString_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000065PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000066{
Tim Peters9e897f42001-05-09 07:37:07 +000067 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000068 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 Py_INCREF(op);
74 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000085
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000086 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000087 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000090 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000092 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000096 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000104 PyObject *t = (PyObject *)op;
105 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000106 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111}
112
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000114PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Tim Peters62de65b2001-12-06 20:29:32 +0000116 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000117 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000118
119 assert(str != NULL);
120 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000121 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000122 PyErr_SetString(PyExc_OverflowError,
123 "string is too long for a Python string");
124 return NULL;
125 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 if (size == 0 && (op = nullstring) != NULL) {
127#ifdef COUNT_ALLOCS
128 null_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
133 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134#ifdef COUNT_ALLOCS
135 one_strings++;
136#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 Py_INCREF(op);
138 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000141 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000142 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000147 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000148 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000149 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000170 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000178#ifdef __va_copy
179 __va_copy(count, vargs);
180#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000181 count = vargs;
182#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
188 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
189 ;
190
Tim Peters8931ff12006-05-13 23:28:20 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000196 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000197
Barry Warsawdadace02001-08-24 18:32:06 +0000198 switch (*f) {
199 case 'c':
200 (void)va_arg(count, int);
201 /* fall through... */
202 case '%':
203 n++;
204 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000205 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000206 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 /* 20 bytes is enough to hold a 64-bit
208 integer. Decimal takes the most space.
209 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000210 n += 20;
211 break;
212 case 's':
213 s = va_arg(count, char*);
214 n += strlen(s);
215 break;
216 case 'p':
217 (void) va_arg(count, int);
218 /* maximum 64-bit pointer representation:
219 * 0xffffffffffffffff
220 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000222 */
223 n += 19;
224 break;
225 default:
226 /* if we stumble upon an unknown
227 formatting code, copy the rest of
228 the format string to the output
229 string. (we cannot just skip the
230 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 n += strlen(p);
233 goto expand;
234 }
235 } else
236 n++;
237 }
238 expand:
239 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000240 /* Since we've analyzed how much space we need for the worst case,
241 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000242 string = PyString_FromStringAndSize(NULL, n);
243 if (!string)
244 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000245
Barry Warsawdadace02001-08-24 18:32:06 +0000246 s = PyString_AsString(string);
247
248 for (f = format; *f; f++) {
249 if (*f == '%') {
250 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 Py_ssize_t i;
252 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000253 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000254 /* parse the width.precision part (we're only
255 interested in the precision value, if any) */
256 n = 0;
257 while (isdigit(Py_CHARMASK(*f)))
258 n = (n*10) + *f++ - '0';
259 if (*f == '.') {
260 f++;
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 }
265 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
266 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000267 /* handle the long flag, but only for %ld and %lu.
268 others can be added when necessary. */
269 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000270 longflag = 1;
271 ++f;
272 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000273 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000274 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000275 size_tflag = 1;
276 ++f;
277 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000278
Barry Warsawdadace02001-08-24 18:32:06 +0000279 switch (*f) {
280 case 'c':
281 *s++ = va_arg(vargs, int);
282 break;
283 case 'd':
284 if (longflag)
285 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000286 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
288 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
290 sprintf(s, "%d", va_arg(vargs, int));
291 s += strlen(s);
292 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000293 case 'u':
294 if (longflag)
295 sprintf(s, "%lu",
296 va_arg(vargs, unsigned long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
299 va_arg(vargs, size_t));
300 else
301 sprintf(s, "%u",
302 va_arg(vargs, unsigned int));
303 s += strlen(s);
304 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000305 case 'i':
306 sprintf(s, "%i", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 'x':
310 sprintf(s, "%x", va_arg(vargs, int));
311 s += strlen(s);
312 break;
313 case 's':
314 p = va_arg(vargs, char*);
315 i = strlen(p);
316 if (n > 0 && i > n)
317 i = n;
318 memcpy(s, p, i);
319 s += i;
320 break;
321 case 'p':
322 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000323 /* %p is ill-defined: ensure leading 0x. */
324 if (s[1] == 'X')
325 s[1] = 'x';
326 else if (s[1] != 'x') {
327 memmove(s+2, s, strlen(s)+1);
328 s[0] = '0';
329 s[1] = 'x';
330 }
Barry Warsawdadace02001-08-24 18:32:06 +0000331 s += strlen(s);
332 break;
333 case '%':
334 *s++ = '%';
335 break;
336 default:
337 strcpy(s, p);
338 s += strlen(s);
339 goto end;
340 }
341 } else
342 *s++ = *f;
343 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344
Barry Warsawdadace02001-08-24 18:32:06 +0000345 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000347 return string;
348}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000349
Barry Warsawdadace02001-08-24 18:32:06 +0000350PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000351PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000352{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354 va_list vargs;
355
356#ifdef HAVE_STDARG_PROTOTYPES
357 va_start(vargs, format);
358#else
359 va_start(vargs);
360#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000361 ret = PyString_FromFormatV(format, vargs);
362 va_end(vargs);
363 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000364}
365
366
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000368 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 const char *encoding,
370 const char *errors)
371{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 PyObject *v, *str;
373
374 str = PyString_FromStringAndSize(s, size);
375 if (str == NULL)
376 return NULL;
377 v = PyString_AsDecodedString(str, encoding, errors);
378 Py_DECREF(str);
379 return v;
380}
381
382PyObject *PyString_AsDecodedObject(PyObject *str,
383 const char *encoding,
384 const char *errors)
385{
386 PyObject *v;
387
388 if (!PyString_Check(str)) {
389 PyErr_BadArgument();
390 goto onError;
391 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393 if (encoding == NULL) {
394#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#else
397 PyErr_SetString(PyExc_ValueError, "no encoding specified");
398 goto onError;
399#endif
400 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401
402 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 v = PyCodec_Decode(str, encoding, errors);
404 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000405 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000406
407 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000410 return NULL;
411}
412
413PyObject *PyString_AsDecodedString(PyObject *str,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v;
418
419 v = PyString_AsDecodedObject(str, encoding, errors);
420 if (v == NULL)
421 goto onError;
422
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000423#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424 /* Convert Unicode to a string using the default encoding */
425 if (PyUnicode_Check(v)) {
426 PyObject *temp = v;
427 v = PyUnicode_AsEncodedString(v, NULL, NULL);
428 Py_DECREF(temp);
429 if (v == NULL)
430 goto onError;
431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433 if (!PyString_Check(v)) {
434 PyErr_Format(PyExc_TypeError,
435 "decoder did not return a string object (type=%.400s)",
436 v->ob_type->tp_name);
437 Py_DECREF(v);
438 goto onError;
439 }
440
441 return v;
442
443 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 return NULL;
445}
446
447PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000448 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000453
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 str = PyString_FromStringAndSize(s, size);
455 if (str == NULL)
456 return NULL;
457 v = PyString_AsEncodedString(str, encoding, errors);
458 Py_DECREF(str);
459 return v;
460}
461
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 const char *encoding,
464 const char *errors)
465{
466 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000467
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(str)) {
469 PyErr_BadArgument();
470 goto onError;
471 }
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473 if (encoding == NULL) {
474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#else
477 PyErr_SetString(PyExc_ValueError, "no encoding specified");
478 goto onError;
479#endif
480 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000481
482 /* Encode via the codec registry */
483 v = PyCodec_Encode(str, encoding, errors);
484 if (v == NULL)
485 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
487 return v;
488
489 onError:
490 return NULL;
491}
492
493PyObject *PyString_AsEncodedString(PyObject *str,
494 const char *encoding,
495 const char *errors)
496{
497 PyObject *v;
498
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000499 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000500 if (v == NULL)
501 goto onError;
502
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000503#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 /* Convert Unicode to a string using the default encoding */
505 if (PyUnicode_Check(v)) {
506 PyObject *temp = v;
507 v = PyUnicode_AsEncodedString(v, NULL, NULL);
508 Py_DECREF(temp);
509 if (v == NULL)
510 goto onError;
511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000512#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 if (!PyString_Check(v)) {
514 PyErr_Format(PyExc_TypeError,
515 "encoder did not return a string object (type=%.400s)",
516 v->ob_type->tp_name);
517 Py_DECREF(v);
518 goto onError;
519 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000520
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000521 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000522
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000523 onError:
524 return NULL;
525}
526
Guido van Rossum234f9421993-06-17 12:35:49 +0000527static void
Fred Drakeba096332000-07-09 07:04:36 +0000528string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000529{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000530 switch (PyString_CHECK_INTERNED(op)) {
531 case SSTATE_NOT_INTERNED:
532 break;
533
534 case SSTATE_INTERNED_MORTAL:
535 /* revive dead object temporarily for DelItem */
536 op->ob_refcnt = 3;
537 if (PyDict_DelItem(interned, op) != 0)
538 Py_FatalError(
539 "deletion of interned string failed");
540 break;
541
542 case SSTATE_INTERNED_IMMORTAL:
543 Py_FatalError("Immortal interned string died.");
544
545 default:
546 Py_FatalError("Inconsistent interned string state.");
547 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000548 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000549}
550
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551/* Unescape a backslash-escaped string. If unicode is non-zero,
552 the string is a u-literal. If recode_encoding is non-zero,
553 the string is UTF-8 encoded and should be re-encoded in the
554 specified encoding. */
555
556PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 const char *recode_encoding)
561{
562 int c;
563 char *p, *buf;
564 const char *end;
565 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000566 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000567 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 if (v == NULL)
569 return NULL;
570 p = buf = PyString_AsString(v);
571 end = s + len;
572 while (s < end) {
573 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000574 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575#ifdef Py_USING_UNICODE
576 if (recode_encoding && (*s & 0x80)) {
577 PyObject *u, *w;
578 char *r;
579 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000580 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 t = s;
582 /* Decode non-ASCII bytes as UTF-8. */
583 while (t < end && (*t & 0x80)) t++;
584 u = PyUnicode_DecodeUTF8(s, t - s, errors);
585 if(!u) goto failed;
586
587 /* Recode them in target encoding. */
588 w = PyUnicode_AsEncodedString(
589 u, recode_encoding, errors);
590 Py_DECREF(u);
591 if (!w) goto failed;
592
593 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000594 assert(PyString_Check(w));
595 r = PyString_AS_STRING(w);
596 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 memcpy(p, r, rn);
598 p += rn;
599 Py_DECREF(w);
600 s = t;
601 } else {
602 *p++ = *s++;
603 }
604#else
605 *p++ = *s++;
606#endif
607 continue;
608 }
609 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000610 if (s==end) {
611 PyErr_SetString(PyExc_ValueError,
612 "Trailing \\ in string");
613 goto failed;
614 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000615 switch (*s++) {
616 /* XXX This assumes ASCII! */
617 case '\n': break;
618 case '\\': *p++ = '\\'; break;
619 case '\'': *p++ = '\''; break;
620 case '\"': *p++ = '\"'; break;
621 case 'b': *p++ = '\b'; break;
622 case 'f': *p++ = '\014'; break; /* FF */
623 case 't': *p++ = '\t'; break;
624 case 'n': *p++ = '\n'; break;
625 case 'r': *p++ = '\r'; break;
626 case 'v': *p++ = '\013'; break; /* VT */
627 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
628 case '0': case '1': case '2': case '3':
629 case '4': case '5': case '6': case '7':
630 c = s[-1] - '0';
631 if ('0' <= *s && *s <= '7') {
632 c = (c<<3) + *s++ - '0';
633 if ('0' <= *s && *s <= '7')
634 c = (c<<3) + *s++ - '0';
635 }
636 *p++ = c;
637 break;
638 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000639 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 && isxdigit(Py_CHARMASK(s[1]))) {
641 unsigned int x = 0;
642 c = Py_CHARMASK(*s);
643 s++;
644 if (isdigit(c))
645 x = c - '0';
646 else if (islower(c))
647 x = 10 + c - 'a';
648 else
649 x = 10 + c - 'A';
650 x = x << 4;
651 c = Py_CHARMASK(*s);
652 s++;
653 if (isdigit(c))
654 x += c - '0';
655 else if (islower(c))
656 x += 10 + c - 'a';
657 else
658 x += 10 + c - 'A';
659 *p++ = x;
660 break;
661 }
662 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000663 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667 if (strcmp(errors, "replace") == 0) {
668 *p++ = '?';
669 } else if (strcmp(errors, "ignore") == 0)
670 /* do nothing */;
671 else {
672 PyErr_Format(PyExc_ValueError,
673 "decoding error; "
674 "unknown error handling code: %.400s",
675 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#ifndef Py_USING_UNICODE
679 case 'u':
680 case 'U':
681 case 'N':
682 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000683 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 "Unicode escapes not legal "
685 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000686 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 }
688#endif
689 default:
690 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000691 s--;
692 goto non_esc; /* an arbitry number of unescaped
693 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 }
695 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000696 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000698 return v;
699 failed:
700 Py_DECREF(v);
701 return NULL;
702}
703
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705string_getsize(register PyObject *op)
706{
707 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (PyString_AsStringAndSize(op, &s, &len))
710 return -1;
711 return len;
712}
713
714static /*const*/ char *
715string_getbuffer(register PyObject *op)
716{
717 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000718 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
720 return NULL;
721 return s;
722}
723
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
732/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000733PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (!PyString_Check(op))
736 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738}
739
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740int
741PyString_AsStringAndSize(register PyObject *obj,
742 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000743 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744{
745 if (s == NULL) {
746 PyErr_BadInternalCall();
747 return -1;
748 }
749
750 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 if (PyUnicode_Check(obj)) {
753 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
754 if (obj == NULL)
755 return -1;
756 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000757 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000758#endif
759 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_Format(PyExc_TypeError,
761 "expected string or Unicode object, "
762 "%.200s found", obj->ob_type->tp_name);
763 return -1;
764 }
765 }
766
767 *s = PyString_AS_STRING(obj);
768 if (len != NULL)
769 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000770 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000771 PyErr_SetString(PyExc_TypeError,
772 "expected string without null bytes");
773 return -1;
774 }
775 return 0;
776}
777
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778/* -------------------------------------------------------------------- */
779/* Helpers */
780
781#define USE_FAST /* experimental fast search implementation */
782
783/* XXX - this code is copied from unicodeobject.c. we really should
784 refactor the core implementations (see _sre.c for how this can be
785 done), but that'll have to wait -- fredrik */
786
787/* fast search/count implementation, based on a mix between boyer-
788 moore and horspool, with a few more bells and whistles on the top.
789 for some more background, see: http://effbot.org/stringlib */
790
791/* note: fastsearch may access s[n], which isn't a problem when using
792 Python's ordinary string types, but may cause problems if you're
793 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000794 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000795 it has actually checked for matches, but didn't find any. callers
796 beware! */
797
798#define FAST_COUNT 0
799#define FAST_SEARCH 1
800
801LOCAL(Py_ssize_t)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000802fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
Fredrik Lundhaf722372006-05-25 17:55:31 +0000803{
804 long mask;
Fredrik Lundhf2c0dfd2006-05-26 10:27:17 +0000805 Py_ssize_t skip, count = 0;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000806 Py_ssize_t i, j, mlast, w;
807
808 w = n - m;
809
810 if (w < 0)
811 return -1;
812
813 /* look for special cases */
814 if (m <= 1) {
815 if (m <= 0)
816 return -1;
817 /* use special case for 1-character strings */
818 if (mode == FAST_COUNT) {
819 for (i = 0; i < n; i++)
820 if (s[i] == p[0])
821 count++;
822 return count;
823 } else {
824 for (i = 0; i < n; i++)
825 if (s[i] == p[0])
826 return i;
827 }
828 return -1;
829 }
830
831 mlast = m - 1;
832
833 /* create compressed boyer-moore delta 1 table */
834 skip = mlast - 1;
835 /* process pattern[:-1] */
836 for (mask = i = 0; i < mlast; i++) {
837 mask |= (1 << (p[i] & 0x1F));
838 if (p[i] == p[mlast])
839 skip = mlast - i - 1;
840 }
841 /* process pattern[-1] outside the loop */
842 mask |= (1 << (p[mlast] & 0x1F));
843
844 for (i = 0; i <= w; i++) {
845 /* note: using mlast in the skip path slows things down on x86 */
846 if (s[i+m-1] == p[m-1]) {
847 /* candidate match */
848 for (j = 0; j < mlast; j++)
849 if (s[i+j] != p[j])
850 break;
851 if (j == mlast) {
852 /* got a match! */
853 if (mode != FAST_COUNT)
854 return i;
855 count++;
856 i = i + mlast;
857 continue;
858 }
859 /* miss: check if next character is part of pattern */
860 if (!(mask & (1 << (s[i+m] & 0x1F))))
861 i = i + m;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000862 else
Fredrik Lundhaf722372006-05-25 17:55:31 +0000863 i = i + skip;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000864 } else {
865 /* skip: check if next character is part of pattern */
866 if (!(mask & (1 << (s[i+m] & 0x1F))))
867 i = i + m;
868 }
869 }
870
871 if (mode != FAST_COUNT)
872 return -1;
873 return count;
874}
875
876/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877/* Methods */
878
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000882 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000885
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000886 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000887 if (! PyString_CheckExact(op)) {
888 int ret;
889 /* A str subclass may have its own __str__ method. */
890 op = (PyStringObject *) PyObject_Str((PyObject *)op);
891 if (op == NULL)
892 return -1;
893 ret = string_print(op, fp, flags);
894 Py_DECREF(op);
895 return ret;
896 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000897 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000898#ifdef __VMS
899 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
900#else
901 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
902#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000903 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000905
Thomas Wouters7e474022000-07-16 12:04:32 +0000906 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000907 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000908 if (memchr(op->ob_sval, '\'', op->ob_size) &&
909 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000910 quote = '"';
911
912 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 for (i = 0; i < op->ob_size; i++) {
914 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000915 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000916 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000917 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000918 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000919 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000920 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000921 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000922 fprintf(fp, "\\r");
923 else if (c < ' ' || c >= 0x7f)
924 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000925 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000926 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000928 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000929 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930}
931
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000932PyObject *
933PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000935 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000936 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000937 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000938 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000939 PyErr_SetString(PyExc_OverflowError,
940 "string is too large to make repr");
941 }
942 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000944 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 }
946 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000947 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 register char c;
949 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000950 int quote;
951
Thomas Wouters7e474022000-07-16 12:04:32 +0000952 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000953 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000954 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000955 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000956 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000957 quote = '"';
958
Tim Peters9161c8b2001-12-03 01:55:38 +0000959 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000960 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000961 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000962 /* There's at least enough room for a hex escape
963 and a closing quote. */
964 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000966 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000967 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000968 else if (c == '\t')
969 *p++ = '\\', *p++ = 't';
970 else if (c == '\n')
971 *p++ = '\\', *p++ = 'n';
972 else if (c == '\r')
973 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000974 else if (c < ' ' || c >= 0x7f) {
975 /* For performance, we don't want to call
976 PyOS_snprintf here (extra layers of
977 function call). */
978 sprintf(p, "\\x%02x", c & 0xff);
979 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000980 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000981 else
982 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000984 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000985 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000988 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000989 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991}
992
Guido van Rossum189f1df2001-05-01 16:51:53 +0000993static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000994string_repr(PyObject *op)
995{
996 return PyString_Repr(op, 1);
997}
998
999static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001000string_str(PyObject *s)
1001{
Tim Petersc9933152001-10-16 20:18:24 +00001002 assert(PyString_Check(s));
1003 if (PyString_CheckExact(s)) {
1004 Py_INCREF(s);
1005 return s;
1006 }
1007 else {
1008 /* Subtype -- return genuine string with the same value. */
1009 PyStringObject *t = (PyStringObject *) s;
1010 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
1011 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001012}
1013
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001015string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001016{
1017 return a->ob_size;
1018}
1019
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001021string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001022{
Andrew Dalke598710c2006-05-25 18:18:39 +00001023 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024 register PyStringObject *op;
1025 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001026#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001027 if (PyUnicode_Check(bb))
1028 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001030 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001031 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001032 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033 return NULL;
1034 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001037 if ((a->ob_size == 0 || b->ob_size == 0) &&
1038 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1039 if (a->ob_size == 0) {
1040 Py_INCREF(bb);
1041 return bb;
1042 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043 Py_INCREF(a);
1044 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045 }
1046 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001047 if (size < 0) {
1048 PyErr_SetString(PyExc_OverflowError,
1049 "strings are too large to concat");
1050 return NULL;
1051 }
1052
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001053 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001054 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001055 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001056 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001057 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001058 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001059 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001060 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1061 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001062 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001063 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001064#undef b
1065}
1066
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001068string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001070 register Py_ssize_t i;
1071 register Py_ssize_t j;
1072 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001074 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075 if (n < 0)
1076 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001077 /* watch out for overflows: the size can overflow int,
1078 * and the # of bytes needed can overflow size_t
1079 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001081 if (n && size / n != a->ob_size) {
1082 PyErr_SetString(PyExc_OverflowError,
1083 "repeated string is too long");
1084 return NULL;
1085 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001086 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 Py_INCREF(a);
1088 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001089 }
Tim Peterse7c05322004-06-27 17:24:49 +00001090 nbytes = (size_t)size;
1091 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001092 PyErr_SetString(PyExc_OverflowError,
1093 "repeated string is too long");
1094 return NULL;
1095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001096 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001097 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001098 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001100 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001101 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001102 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001103 op->ob_sval[size] = '\0';
1104 if (a->ob_size == 1 && n > 0) {
1105 memset(op->ob_sval, a->ob_sval[0] , n);
1106 return (PyObject *) op;
1107 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001108 i = 0;
1109 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001110 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1111 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001112 }
1113 while (i < size) {
1114 j = (i <= size-i) ? i : size-i;
1115 memcpy(op->ob_sval+i, op->ob_sval, j);
1116 i += j;
1117 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001118 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119}
1120
1121/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1122
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001123static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001124string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001125 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001126 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001127{
1128 if (i < 0)
1129 i = 0;
1130 if (j < 0)
1131 j = 0; /* Avoid signed/unsigned bug in next line */
1132 if (j > a->ob_size)
1133 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001134 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1135 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001136 Py_INCREF(a);
1137 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001138 }
1139 if (j < i)
1140 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001141 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001142}
1143
Guido van Rossum9284a572000-03-07 15:53:43 +00001144static int
Fred Drakeba096332000-07-09 07:04:36 +00001145string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001146{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001147 char *s = PyString_AS_STRING(a);
1148 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001149 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001150#ifdef USE_FAST
1151 Py_ssize_t pos;
1152#else
1153 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001154 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001155 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001156#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001157
1158 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001159#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001160 if (PyUnicode_Check(el))
1161 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001162#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001163 if (!PyString_Check(el)) {
1164 PyErr_SetString(PyExc_TypeError,
1165 "'in <string>' requires string as left operand");
1166 return -1;
1167 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001168 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001169
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001170 if (len_sub == 0)
1171 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001172
1173#ifdef USE_FAST
1174 pos = fastsearch(
1175 s, PyString_GET_SIZE(a),
1176 sub, len_sub, FAST_SEARCH
1177 );
1178 return (pos != -1);
1179#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001180 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001181 substring. When s<last, there is still room for a possible match
1182 and s[0] through s[len_sub-1] will be in bounds.
1183 shortsub is len_sub minus the last character which is checked
1184 separately just before the memcmp(). That check helps prevent
1185 false starts and saves the setup time for memcmp().
1186 */
1187 firstchar = sub[0];
1188 shortsub = len_sub - 1;
1189 lastchar = sub[shortsub];
1190 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1191 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001192 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001193 if (s == NULL)
1194 return 0;
1195 assert(s < last);
1196 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001197 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001198 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001199 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001200#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001201 return 0;
1202}
1203
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001204static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001205string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001206{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001207 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001208 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001209 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001210 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001211 return NULL;
1212 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001213 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001214 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001215 if (v == NULL)
1216 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001217 else {
1218#ifdef COUNT_ALLOCS
1219 one_strings++;
1220#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001221 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001222 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001223 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001224}
1225
Martin v. Löwiscd353062001-05-24 16:56:35 +00001226static PyObject*
1227string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001228{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001229 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001230 Py_ssize_t len_a, len_b;
1231 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001232 PyObject *result;
1233
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001234 /* Make sure both arguments are strings. */
1235 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001236 result = Py_NotImplemented;
1237 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001238 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001239 if (a == b) {
1240 switch (op) {
1241 case Py_EQ:case Py_LE:case Py_GE:
1242 result = Py_True;
1243 goto out;
1244 case Py_NE:case Py_LT:case Py_GT:
1245 result = Py_False;
1246 goto out;
1247 }
1248 }
1249 if (op == Py_EQ) {
1250 /* Supporting Py_NE here as well does not save
1251 much time, since Py_NE is rarely used. */
1252 if (a->ob_size == b->ob_size
1253 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001254 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001255 a->ob_size) == 0)) {
1256 result = Py_True;
1257 } else {
1258 result = Py_False;
1259 }
1260 goto out;
1261 }
1262 len_a = a->ob_size; len_b = b->ob_size;
1263 min_len = (len_a < len_b) ? len_a : len_b;
1264 if (min_len > 0) {
1265 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1266 if (c==0)
1267 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1268 }else
1269 c = 0;
1270 if (c == 0)
1271 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1272 switch (op) {
1273 case Py_LT: c = c < 0; break;
1274 case Py_LE: c = c <= 0; break;
1275 case Py_EQ: assert(0); break; /* unreachable */
1276 case Py_NE: c = c != 0; break;
1277 case Py_GT: c = c > 0; break;
1278 case Py_GE: c = c >= 0; break;
1279 default:
1280 result = Py_NotImplemented;
1281 goto out;
1282 }
1283 result = c ? Py_True : Py_False;
1284 out:
1285 Py_INCREF(result);
1286 return result;
1287}
1288
1289int
1290_PyString_Eq(PyObject *o1, PyObject *o2)
1291{
1292 PyStringObject *a, *b;
1293 a = (PyStringObject*)o1;
1294 b = (PyStringObject*)o2;
1295 return a->ob_size == b->ob_size
1296 && *a->ob_sval == *b->ob_sval
1297 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001298}
1299
Guido van Rossum9bfef441993-03-29 10:43:31 +00001300static long
Fred Drakeba096332000-07-09 07:04:36 +00001301string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001302{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001303 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001304 register unsigned char *p;
1305 register long x;
1306
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001307 if (a->ob_shash != -1)
1308 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001309 len = a->ob_size;
1310 p = (unsigned char *) a->ob_sval;
1311 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001312 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001313 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001314 x ^= a->ob_size;
1315 if (x == -1)
1316 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001317 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001318 return x;
1319}
1320
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001321#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1322
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001323static PyObject*
1324string_subscript(PyStringObject* self, PyObject* item)
1325{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001326 PyNumberMethods *nb = item->ob_type->tp_as_number;
1327 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1328 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001329 if (i == -1 && PyErr_Occurred())
1330 return NULL;
1331 if (i < 0)
1332 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001333 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001334 }
1335 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001336 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001337 char* source_buf;
1338 char* result_buf;
1339 PyObject* result;
1340
Tim Petersae1d0c92006-03-17 03:29:34 +00001341 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001342 PyString_GET_SIZE(self),
1343 &start, &stop, &step, &slicelength) < 0) {
1344 return NULL;
1345 }
1346
1347 if (slicelength <= 0) {
1348 return PyString_FromStringAndSize("", 0);
1349 }
1350 else {
1351 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001352 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001353 if (result_buf == NULL)
1354 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001355
Tim Petersae1d0c92006-03-17 03:29:34 +00001356 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001357 cur += step, i++) {
1358 result_buf[i] = source_buf[cur];
1359 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001360
1361 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001362 slicelength);
1363 PyMem_Free(result_buf);
1364 return result;
1365 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001366 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001367 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001368 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001369 "string indices must be integers");
1370 return NULL;
1371 }
1372}
1373
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374static Py_ssize_t
1375string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001376{
1377 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001378 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001379 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001380 return -1;
1381 }
1382 *ptr = (void *)self->ob_sval;
1383 return self->ob_size;
1384}
1385
Martin v. Löwis18e16552006-02-15 17:27:45 +00001386static Py_ssize_t
1387string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001388{
Guido van Rossum045e6881997-09-08 18:30:11 +00001389 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001390 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001391 return -1;
1392}
1393
Martin v. Löwis18e16552006-02-15 17:27:45 +00001394static Py_ssize_t
1395string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001396{
1397 if ( lenp )
1398 *lenp = self->ob_size;
1399 return 1;
1400}
1401
Martin v. Löwis18e16552006-02-15 17:27:45 +00001402static Py_ssize_t
1403string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001404{
1405 if ( index != 0 ) {
1406 PyErr_SetString(PyExc_SystemError,
1407 "accessing non-existent string segment");
1408 return -1;
1409 }
1410 *ptr = self->ob_sval;
1411 return self->ob_size;
1412}
1413
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001414static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001415 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001416 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001417 (ssizeargfunc)string_repeat, /*sq_repeat*/
1418 (ssizeargfunc)string_item, /*sq_item*/
1419 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001420 0, /*sq_ass_item*/
1421 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001422 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001423};
1424
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001425static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001426 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001427 (binaryfunc)string_subscript,
1428 0,
1429};
1430
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001431static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001432 (readbufferproc)string_buffer_getreadbuf,
1433 (writebufferproc)string_buffer_getwritebuf,
1434 (segcountproc)string_buffer_getsegcount,
1435 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001436};
1437
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
1439
1440#define LEFTSTRIP 0
1441#define RIGHTSTRIP 1
1442#define BOTHSTRIP 2
1443
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001444/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001445static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1446
1447#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001448
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001449#define SPLIT_APPEND(data, left, right) \
1450 str = PyString_FromStringAndSize((data) + (left), \
1451 (right) - (left)); \
1452 if (str == NULL) \
1453 goto onError; \
1454 if (PyList_Append(list, str)) { \
1455 Py_DECREF(str); \
1456 goto onError; \
1457 } \
1458 else \
1459 Py_DECREF(str);
1460
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001462split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001464 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466 PyObject *list = PyList_New(0);
1467
1468 if (list == NULL)
1469 return NULL;
1470
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 for (i = j = 0; i < len; ) {
1472 while (i < len && isspace(Py_CHARMASK(s[i])))
1473 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475 while (i < len && !isspace(Py_CHARMASK(s[i])))
1476 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 if (maxsplit-- <= 0)
1479 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001480 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 while (i < len && isspace(Py_CHARMASK(s[i])))
1482 i++;
1483 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 }
1485 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001488 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 Py_DECREF(list);
1492 return NULL;
1493}
1494
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001496split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001497{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001498 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001499 PyObject *str;
1500 PyObject *list = PyList_New(0);
1501
1502 if (list == NULL)
1503 return NULL;
1504
1505 for (i = j = 0; i < len; ) {
1506 if (s[i] == ch) {
1507 if (maxcount-- <= 0)
1508 break;
1509 SPLIT_APPEND(s, j, i);
1510 i = j = i + 1;
1511 } else
1512 i++;
1513 }
1514 if (j <= len) {
1515 SPLIT_APPEND(s, j, len);
1516 }
1517 return list;
1518
1519 onError:
1520 Py_DECREF(list);
1521 return NULL;
1522}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001524PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525"S.split([sep [,maxsplit]]) -> list of strings\n\
1526\n\
1527Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001529splits are done. If sep is not specified or is None, any\n\
1530whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531
1532static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001533string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001535 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1536 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001537 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 const char *s = PyString_AS_STRING(self), *sub;
1539 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540
Martin v. Löwis9c830762006-04-13 08:37:17 +00001541 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001544 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 if (PyString_Check(subobj)) {
1548 sub = PyString_AS_STRING(subobj);
1549 n = PyString_GET_SIZE(subobj);
1550 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001551#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 else if (PyUnicode_Check(subobj))
1553 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001554#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1556 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001557
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 if (n == 0) {
1559 PyErr_SetString(PyExc_ValueError, "empty separator");
1560 return NULL;
1561 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001562 else if (n == 1)
1563 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564
1565 list = PyList_New(0);
1566 if (list == NULL)
1567 return NULL;
1568
1569 i = j = 0;
1570 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001571 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 if (maxsplit-- <= 0)
1573 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001574 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 if (item == NULL)
1576 goto fail;
1577 err = PyList_Append(list, item);
1578 Py_DECREF(item);
1579 if (err < 0)
1580 goto fail;
1581 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 }
1583 else
1584 i++;
1585 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001586 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 if (item == NULL)
1588 goto fail;
1589 err = PyList_Append(list, item);
1590 Py_DECREF(item);
1591 if (err < 0)
1592 goto fail;
1593
1594 return list;
1595
1596 fail:
1597 Py_DECREF(list);
1598 return NULL;
1599}
1600
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001601PyDoc_STRVAR(partition__doc__,
1602"S.partition(sep) -> (head, sep, tail)\n\
1603\n\
1604Searches for the separator sep in S, and returns the part before it,\n\
1605the separator itself, and the part after it. If the separator is not\n\
1606found, returns S and two empty strings.");
1607
1608static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001609string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001610{
1611 Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
1612 const char *str = PyString_AS_STRING(self), *sep;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001613 PyObject * out;
1614
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001615 if (PyString_Check(sep_obj)) {
1616 sep = PyString_AS_STRING(sep_obj);
1617 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001618 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001619#ifdef Py_USING_UNICODE
1620 else if (PyUnicode_Check(sep_obj))
1621 return PyUnicode_Partition((PyObject *)self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001622#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001623 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001624 return NULL;
1625
1626 if (sep_len == 0) {
1627 PyErr_SetString(PyExc_ValueError, "empty separator");
1628 return NULL;
1629 }
1630
1631 out = PyTuple_New(3);
1632 if (!out)
1633 return NULL;
1634
1635 pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
1636 if (pos < 0) {
1637 Py_INCREF(self);
1638 PyTuple_SET_ITEM(out, 0, (PyObject*) self);
1639 Py_INCREF(nullstring);
1640 PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring);
1641 Py_INCREF(nullstring);
1642 PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
1643 } else {
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001644 PyObject* obj;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001645 PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001646 Py_INCREF(sep_obj);
1647 PyTuple_SET_ITEM(out, 1, sep_obj);
1648 pos += sep_len;
1649 obj = PyString_FromStringAndSize(str + pos, len - pos);
1650 PyTuple_SET_ITEM(out, 2, obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001651 if (PyErr_Occurred()) {
1652 Py_DECREF(out);
1653 return NULL;
1654 }
1655 }
1656
1657 return out;
1658}
1659
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001661rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001663 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001664 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001665 PyObject *list = PyList_New(0);
1666
1667 if (list == NULL)
1668 return NULL;
1669
1670 for (i = j = len - 1; i >= 0; ) {
1671 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1672 i--;
1673 j = i;
1674 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1675 i--;
1676 if (j > i) {
1677 if (maxsplit-- <= 0)
1678 break;
Fredrik Lundh554da412006-05-25 19:19:05 +00001679 SPLIT_APPEND(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1681 i--;
1682 j = i;
1683 }
1684 }
1685 if (j >= 0) {
Fredrik Lundh554da412006-05-25 19:19:05 +00001686 SPLIT_APPEND(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001687 }
Fredrik Lundh554da412006-05-25 19:19:05 +00001688 if (PyList_Reverse(list) < 0)
1689 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001691 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 Py_DECREF(list);
1693 return NULL;
1694}
1695
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001696static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001697rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001699 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001700 PyObject *str;
1701 PyObject *list = PyList_New(0);
1702
1703 if (list == NULL)
1704 return NULL;
1705
1706 for (i = j = len - 1; i >= 0; ) {
1707 if (s[i] == ch) {
1708 if (maxcount-- <= 0)
1709 break;
Fredrik Lundh554da412006-05-25 19:19:05 +00001710 SPLIT_APPEND(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001711 j = i = i - 1;
1712 } else
1713 i--;
1714 }
1715 if (j >= -1) {
Fredrik Lundh554da412006-05-25 19:19:05 +00001716 SPLIT_APPEND(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001717 }
Fredrik Lundh554da412006-05-25 19:19:05 +00001718 if (PyList_Reverse(list) < 0)
1719 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001720 return list;
1721
1722 onError:
1723 Py_DECREF(list);
1724 return NULL;
1725}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001726
1727PyDoc_STRVAR(rsplit__doc__,
1728"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1729\n\
1730Return a list of the words in the string S, using sep as the\n\
1731delimiter string, starting at the end of the string and working\n\
1732to the front. If maxsplit is given, at most maxsplit splits are\n\
1733done. If sep is not specified or is None, any whitespace string\n\
1734is a separator.");
1735
1736static PyObject *
1737string_rsplit(PyStringObject *self, PyObject *args)
1738{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001739 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1740 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001741 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001742 const char *s = PyString_AS_STRING(self), *sub;
1743 PyObject *list, *item, *subobj = Py_None;
1744
Martin v. Löwis9c830762006-04-13 08:37:17 +00001745 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001746 return NULL;
1747 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001748 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001749 if (subobj == Py_None)
1750 return rsplit_whitespace(s, len, maxsplit);
1751 if (PyString_Check(subobj)) {
1752 sub = PyString_AS_STRING(subobj);
1753 n = PyString_GET_SIZE(subobj);
1754 }
1755#ifdef Py_USING_UNICODE
1756 else if (PyUnicode_Check(subobj))
1757 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1758#endif
1759 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1760 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001761
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001762 if (n == 0) {
1763 PyErr_SetString(PyExc_ValueError, "empty separator");
1764 return NULL;
1765 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001766 else if (n == 1)
1767 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001768
1769 list = PyList_New(0);
1770 if (list == NULL)
1771 return NULL;
1772
1773 j = len;
1774 i = j - n;
1775 while (i >= 0) {
1776 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1777 if (maxsplit-- <= 0)
1778 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001779 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001780 if (item == NULL)
1781 goto fail;
1782 err = PyList_Insert(list, 0, item);
1783 Py_DECREF(item);
1784 if (err < 0)
1785 goto fail;
1786 j = i;
1787 i -= n;
1788 }
1789 else
1790 i--;
1791 }
1792 item = PyString_FromStringAndSize(s, j);
1793 if (item == NULL)
1794 goto fail;
1795 err = PyList_Insert(list, 0, item);
1796 Py_DECREF(item);
1797 if (err < 0)
1798 goto fail;
1799
1800 return list;
1801
1802 fail:
1803 Py_DECREF(list);
1804 return NULL;
1805}
1806
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809"S.join(sequence) -> string\n\
1810\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001811Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001812sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
1814static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001815string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816{
1817 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001821 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001823 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001824 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825
Tim Peters19fe14e2001-01-19 03:03:47 +00001826 seq = PySequence_Fast(orig, "");
1827 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001828 return NULL;
1829 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001830
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001831 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001832 if (seqlen == 0) {
1833 Py_DECREF(seq);
1834 return PyString_FromString("");
1835 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001837 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001838 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1839 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001840 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001841 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001842 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001844
Raymond Hettinger674f2412004-08-23 23:23:54 +00001845 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001846 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001847 * Do a pre-pass to figure out the total amount of space we'll
1848 * need (sz), see whether any argument is absurd, and defer to
1849 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001850 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001851 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001852 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001853 item = PySequence_Fast_GET_ITEM(seq, i);
1854 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001855#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001856 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001857 /* Defer to Unicode join.
1858 * CAUTION: There's no gurantee that the
1859 * original sequence can be iterated over
1860 * again, so we must pass seq here.
1861 */
1862 PyObject *result;
1863 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001864 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001865 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001866 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001867#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001868 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001869 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001870 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001871 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001872 Py_DECREF(seq);
1873 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001874 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001875 sz += PyString_GET_SIZE(item);
1876 if (i != 0)
1877 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001878 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001879 PyErr_SetString(PyExc_OverflowError,
1880 "join() is too long for a Python string");
1881 Py_DECREF(seq);
1882 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001884 }
1885
1886 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001887 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001888 if (res == NULL) {
1889 Py_DECREF(seq);
1890 return NULL;
1891 }
1892
1893 /* Catenate everything. */
1894 p = PyString_AS_STRING(res);
1895 for (i = 0; i < seqlen; ++i) {
1896 size_t n;
1897 item = PySequence_Fast_GET_ITEM(seq, i);
1898 n = PyString_GET_SIZE(item);
1899 memcpy(p, PyString_AS_STRING(item), n);
1900 p += n;
1901 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001902 memcpy(p, sep, seplen);
1903 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001904 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001906
Jeremy Hylton49048292000-07-11 03:28:17 +00001907 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909}
1910
Tim Peters52e155e2001-06-16 05:42:57 +00001911PyObject *
1912_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001913{
Tim Petersa7259592001-06-16 05:11:17 +00001914 assert(sep != NULL && PyString_Check(sep));
1915 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001916 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001917}
1918
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001919static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001920string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001921{
1922 if (*end > len)
1923 *end = len;
1924 else if (*end < 0)
1925 *end += len;
1926 if (*end < 0)
1927 *end = 0;
1928 if (*start < 0)
1929 *start += len;
1930 if (*start < 0)
1931 *start = 0;
1932}
1933
Martin v. Löwis18e16552006-02-15 17:27:45 +00001934static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001935string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001939 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001940 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001943 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001944 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 return -2;
1946 if (PyString_Check(subobj)) {
1947 sub = PyString_AS_STRING(subobj);
1948 n = PyString_GET_SIZE(subobj);
1949 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001950#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001952 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001953#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 return -2;
1956
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001957 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001959#ifdef USE_FAST
1960 if (n == 0)
1961 return (dir > 0) ? i : last;
1962 if (dir > 0) {
1963 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1964 FAST_SEARCH);
1965 if (pos < 0)
1966 return pos;
1967 return pos + i;
1968 }
1969#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 if (dir > 0) {
1971 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001973 last -= n;
1974 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001975 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 return (long)i;
1977 }
1978 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001980
Guido van Rossum4c08d552000-03-10 22:55:18 +00001981 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001982 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001984 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001987
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988 return -1;
1989}
1990
1991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993"S.find(sub [,start [,end]]) -> int\n\
1994\n\
1995Return the lowest index in S where substring sub is found,\n\
1996such that sub is contained within s[start,end]. Optional\n\
1997arguments start and end are interpreted as in slice notation.\n\
1998\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001999Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000
2001static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002002string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002004 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 if (result == -2)
2006 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008}
2009
2010
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002011PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012"S.index(sub [,start [,end]]) -> int\n\
2013\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002014Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015
2016static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002017string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002019 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 if (result == -2)
2021 return NULL;
2022 if (result == -1) {
2023 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002024 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 return NULL;
2026 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002027 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028}
2029
2030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002031PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032"S.rfind(sub [,start [,end]]) -> int\n\
2033\n\
2034Return the highest index in S where substring sub is found,\n\
2035such that sub is contained within s[start,end]. Optional\n\
2036arguments start and end are interpreted as in slice notation.\n\
2037\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039
2040static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002041string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002043 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 if (result == -2)
2045 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002046 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047}
2048
2049
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002050PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051"S.rindex(sub [,start [,end]]) -> int\n\
2052\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002053Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054
2055static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002056string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002058 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059 if (result == -2)
2060 return NULL;
2061 if (result == -1) {
2062 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002063 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
2065 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002066 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067}
2068
2069
2070static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002071do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2072{
2073 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002074 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002075 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002076 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2077 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078
2079 i = 0;
2080 if (striptype != RIGHTSTRIP) {
2081 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2082 i++;
2083 }
2084 }
2085
2086 j = len;
2087 if (striptype != LEFTSTRIP) {
2088 do {
2089 j--;
2090 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2091 j++;
2092 }
2093
2094 if (i == 0 && j == len && PyString_CheckExact(self)) {
2095 Py_INCREF(self);
2096 return (PyObject*)self;
2097 }
2098 else
2099 return PyString_FromStringAndSize(s+i, j-i);
2100}
2101
2102
2103static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002104do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105{
2106 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002107 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109 i = 0;
2110 if (striptype != RIGHTSTRIP) {
2111 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2112 i++;
2113 }
2114 }
2115
2116 j = len;
2117 if (striptype != LEFTSTRIP) {
2118 do {
2119 j--;
2120 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2121 j++;
2122 }
2123
Tim Peters8fa5dd02001-09-12 02:18:30 +00002124 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125 Py_INCREF(self);
2126 return (PyObject*)self;
2127 }
2128 else
2129 return PyString_FromStringAndSize(s+i, j-i);
2130}
2131
2132
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002133static PyObject *
2134do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2135{
2136 PyObject *sep = NULL;
2137
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002138 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002139 return NULL;
2140
2141 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002142 if (PyString_Check(sep))
2143 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002144#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002145 else if (PyUnicode_Check(sep)) {
2146 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2147 PyObject *res;
2148 if (uniself==NULL)
2149 return NULL;
2150 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2151 striptype, sep);
2152 Py_DECREF(uniself);
2153 return res;
2154 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002155#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002156 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002157#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002158 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002159#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002160 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002161#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002162 STRIPNAME(striptype));
2163 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002164 }
2165
2166 return do_strip(self, striptype);
2167}
2168
2169
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002170PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002171"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172\n\
2173Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002174whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002175If chars is given and not None, remove characters in chars instead.\n\
2176If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177
2178static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002179string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002181 if (PyTuple_GET_SIZE(args) == 0)
2182 return do_strip(self, BOTHSTRIP); /* Common case */
2183 else
2184 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185}
2186
2187
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002189"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002191Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002192If chars is given and not None, remove characters in chars instead.\n\
2193If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
2195static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002196string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002198 if (PyTuple_GET_SIZE(args) == 0)
2199 return do_strip(self, LEFTSTRIP); /* Common case */
2200 else
2201 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202}
2203
2204
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002205PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002206"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002208Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002209If chars is given and not None, remove characters in chars instead.\n\
2210If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211
2212static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002213string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002215 if (PyTuple_GET_SIZE(args) == 0)
2216 return do_strip(self, RIGHTSTRIP); /* Common case */
2217 else
2218 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219}
2220
2221
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223"S.lower() -> string\n\
2224\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002225Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002227/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2228#ifndef _tolower
2229#define _tolower tolower
2230#endif
2231
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002233string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002235 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002236 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002239 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002240 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002242
2243 s = PyString_AS_STRING(newobj);
2244
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002245 memcpy(s, PyString_AS_STRING(self), n);
2246
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002248 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002249 if (isupper(c))
2250 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002252
Anthony Baxtera6286212006-04-11 07:42:36 +00002253 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254}
2255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002256PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257"S.upper() -> string\n\
2258\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002261#ifndef _toupper
2262#define _toupper toupper
2263#endif
2264
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002266string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002268 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002269 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002270 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002272 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002273 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002275
2276 s = PyString_AS_STRING(newobj);
2277
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002278 memcpy(s, PyString_AS_STRING(self), n);
2279
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002280 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002281 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002282 if (islower(c))
2283 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002285
Anthony Baxtera6286212006-04-11 07:42:36 +00002286 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287}
2288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002289PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290"S.title() -> string\n\
2291\n\
2292Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294
2295static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002296string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297{
2298 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002299 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002301 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302
Anthony Baxtera6286212006-04-11 07:42:36 +00002303 newobj = PyString_FromStringAndSize(NULL, n);
2304 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002306 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 for (i = 0; i < n; i++) {
2308 int c = Py_CHARMASK(*s++);
2309 if (islower(c)) {
2310 if (!previous_is_cased)
2311 c = toupper(c);
2312 previous_is_cased = 1;
2313 } else if (isupper(c)) {
2314 if (previous_is_cased)
2315 c = tolower(c);
2316 previous_is_cased = 1;
2317 } else
2318 previous_is_cased = 0;
2319 *s_new++ = c;
2320 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002321 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322}
2323
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002324PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325"S.capitalize() -> string\n\
2326\n\
2327Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329
2330static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002331string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332{
2333 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002334 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002335 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336
Anthony Baxtera6286212006-04-11 07:42:36 +00002337 newobj = PyString_FromStringAndSize(NULL, n);
2338 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002340 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341 if (0 < n) {
2342 int c = Py_CHARMASK(*s++);
2343 if (islower(c))
2344 *s_new = toupper(c);
2345 else
2346 *s_new = c;
2347 s_new++;
2348 }
2349 for (i = 1; i < n; i++) {
2350 int c = Py_CHARMASK(*s++);
2351 if (isupper(c))
2352 *s_new = tolower(c);
2353 else
2354 *s_new = c;
2355 s_new++;
2356 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002357 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358}
2359
2360
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002361PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362"S.count(sub[, start[, end]]) -> int\n\
2363\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002364Return the number of non-overlapping occurrences of substring sub in\n\
2365string S[start:end]. Optional arguments start and end are interpreted\n\
2366as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367
2368static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002369string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002371 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002372 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002373 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002374 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376
Guido van Rossumc6821402000-05-08 14:08:05 +00002377 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2378 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002380
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381 if (PyString_Check(subobj)) {
2382 sub = PyString_AS_STRING(subobj);
2383 n = PyString_GET_SIZE(subobj);
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002386 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002387 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002388 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2389 if (count == -1)
2390 return NULL;
2391 else
2392 return PyInt_FromLong((long) count);
2393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002394#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002395 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2396 return NULL;
2397
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002398 string_adjust_indices(&i, &last, len);
2399
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400 m = last + 1 - n;
2401 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002402 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403
Fredrik Lundhaf722372006-05-25 17:55:31 +00002404#ifdef USE_FAST
2405 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2406 if (r < 0)
2407 r = 0; /* no match */
2408#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 r = 0;
2410 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002411 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412 if (!memcmp(s+i, sub, n)) {
2413 r++;
2414 i += n;
2415 } else {
2416 i++;
2417 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002418 if (i >= m)
2419 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002420 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002421 if (t == NULL)
2422 break;
2423 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002425#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002426 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427}
2428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002429PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430"S.swapcase() -> string\n\
2431\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
2435static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002436string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437{
2438 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002439 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002440 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441
Anthony Baxtera6286212006-04-11 07:42:36 +00002442 newobj = PyString_FromStringAndSize(NULL, n);
2443 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002445 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 for (i = 0; i < n; i++) {
2447 int c = Py_CHARMASK(*s++);
2448 if (islower(c)) {
2449 *s_new = toupper(c);
2450 }
2451 else if (isupper(c)) {
2452 *s_new = tolower(c);
2453 }
2454 else
2455 *s_new = c;
2456 s_new++;
2457 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002458 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459}
2460
2461
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002462PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463"S.translate(table [,deletechars]) -> string\n\
2464\n\
2465Return a copy of the string S, where all characters occurring\n\
2466in the optional argument deletechars are removed, and the\n\
2467remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002468translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469
2470static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002471string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002473 register char *input, *output;
2474 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002475 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002478 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479 PyObject *result;
2480 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002481 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002483 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002486
2487 if (PyString_Check(tableobj)) {
2488 table1 = PyString_AS_STRING(tableobj);
2489 tablen = PyString_GET_SIZE(tableobj);
2490 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002491#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002492 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002493 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002494 parameter; instead a mapping to None will cause characters
2495 to be deleted. */
2496 if (delobj != NULL) {
2497 PyErr_SetString(PyExc_TypeError,
2498 "deletions are implemented differently for unicode");
2499 return NULL;
2500 }
2501 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2502 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002503#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002506
Martin v. Löwis00b61272002-12-12 20:03:19 +00002507 if (tablen != 256) {
2508 PyErr_SetString(PyExc_ValueError,
2509 "translation table must be 256 characters long");
2510 return NULL;
2511 }
2512
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 if (delobj != NULL) {
2514 if (PyString_Check(delobj)) {
2515 del_table = PyString_AS_STRING(delobj);
2516 dellen = PyString_GET_SIZE(delobj);
2517 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002518#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519 else if (PyUnicode_Check(delobj)) {
2520 PyErr_SetString(PyExc_TypeError,
2521 "deletions are implemented differently for unicode");
2522 return NULL;
2523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002524#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2526 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 }
2528 else {
2529 del_table = NULL;
2530 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531 }
2532
2533 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002534 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 result = PyString_FromStringAndSize((char *)NULL, inlen);
2536 if (result == NULL)
2537 return NULL;
2538 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002539 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540
2541 if (dellen == 0) {
2542 /* If no deletions are required, use faster code */
2543 for (i = inlen; --i >= 0; ) {
2544 c = Py_CHARMASK(*input++);
2545 if (Py_CHARMASK((*output++ = table[c])) != c)
2546 changed = 1;
2547 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002548 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002549 return result;
2550 Py_DECREF(result);
2551 Py_INCREF(input_obj);
2552 return input_obj;
2553 }
2554
2555 for (i = 0; i < 256; i++)
2556 trans_table[i] = Py_CHARMASK(table[i]);
2557
2558 for (i = 0; i < dellen; i++)
2559 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2560
2561 for (i = inlen; --i >= 0; ) {
2562 c = Py_CHARMASK(*input++);
2563 if (trans_table[c] != -1)
2564 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2565 continue;
2566 changed = 1;
2567 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002568 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002569 Py_DECREF(result);
2570 Py_INCREF(input_obj);
2571 return input_obj;
2572 }
2573 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002574 if (inlen > 0)
2575 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576 return result;
2577}
2578
2579
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580#define FORWARD 1
2581#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002582
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002583/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002584
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585/* Don't call if length < 2 */
2586#define Py_STRING_MATCH(target, offset, pattern, length) \
2587 (target[offset] == pattern[0] && \
2588 target[offset+length-1] == pattern[length-1] && \
2589 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591#define findchar(target, target_len, c) \
2592 ((char *)memchr((const void *)(target), c, target_len))
2593
2594/* String ops must return a string. */
2595/* If the object is subclass of string, create a copy */
2596static PyStringObject *
2597return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002598{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002599 if (PyString_CheckExact(self)) {
2600 Py_INCREF(self);
2601 return self;
2602 }
2603 return (PyStringObject *)PyString_FromStringAndSize(
2604 PyString_AS_STRING(self),
2605 PyString_GET_SIZE(self));
2606}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002608static Py_ssize_t
2609countchar(char *target, int target_len, char c)
2610{
2611 Py_ssize_t count=0;
2612 char *start=target;
2613 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615 while ( (start=findchar(start, end-start, c)) != NULL ) {
2616 count++;
2617 start += 1;
2618 }
2619
2620 return count;
2621}
2622
2623static Py_ssize_t
2624findstring(char *target, Py_ssize_t target_len,
2625 char *pattern, Py_ssize_t pattern_len,
2626 Py_ssize_t start,
2627 Py_ssize_t end,
2628 int direction)
2629{
2630 if (start < 0) {
2631 start += target_len;
2632 if (start < 0)
2633 start = 0;
2634 }
2635 if (end > target_len) {
2636 end = target_len;
2637 } else if (end < 0) {
2638 end += target_len;
2639 if (end < 0)
2640 end = 0;
2641 }
2642
2643 /* zero-length substrings always match at the first attempt */
2644 if (pattern_len == 0)
2645 return (direction > 0) ? start : end;
2646
2647 end -= pattern_len;
2648
2649 if (direction < 0) {
2650 for (; end >= start; end--)
2651 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2652 return end;
2653 } else {
2654 for (; start <= end; start++)
2655 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2656 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002657 }
2658 return -1;
2659}
2660
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661Py_ssize_t
2662countstring(char *target, Py_ssize_t target_len,
2663 char *pattern, Py_ssize_t pattern_len,
2664 Py_ssize_t start,
2665 Py_ssize_t end,
2666 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002667{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002668 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002669
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002670 if (start < 0) {
2671 start += target_len;
2672 if (start < 0)
2673 start = 0;
2674 }
2675 if (end > target_len) {
2676 end = target_len;
2677 } else if (end < 0) {
2678 end += target_len;
2679 if (end < 0)
2680 end = 0;
2681 }
2682
2683 /* zero-length substrings match everywhere */
2684 if (pattern_len == 0)
2685 return target_len+1;
2686
2687 end -= pattern_len;
2688
2689 if (direction < 0) {
2690 for (; end >= start; end--)
2691 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2692 count++;
2693 end -= pattern_len-1;
2694 }
2695 } else {
2696 for (; start <= end; start++)
2697 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2698 count++;
2699 start += pattern_len-1;
2700 }
2701 }
2702 return count;
2703}
2704
2705
2706/* Algorithms for difference cases of string replacement */
2707
2708/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2709static PyStringObject *
2710replace_interleave(PyStringObject *self,
2711 PyStringObject *to,
2712 Py_ssize_t maxcount)
2713{
2714 char *self_s, *to_s, *result_s;
2715 Py_ssize_t self_len, to_len, result_len;
2716 Py_ssize_t count, i, product;
2717 PyStringObject *result;
2718
2719 self_len = PyString_GET_SIZE(self);
2720 to_len = PyString_GET_SIZE(to);
2721
2722 /* 1 at the end plus 1 after every character */
2723 count = self_len+1;
2724 if (maxcount < count)
2725 count = maxcount;
2726
2727 /* Check for overflow */
2728 /* result_len = count * to_len + self_len; */
2729 product = count * to_len;
2730 if (product / to_len != count) {
2731 PyErr_SetString(PyExc_OverflowError,
2732 "replace string is too long");
2733 return NULL;
2734 }
2735 result_len = product + self_len;
2736 if (result_len < 0) {
2737 PyErr_SetString(PyExc_OverflowError,
2738 "replace string is too long");
2739 return NULL;
2740 }
2741
2742 if (! (result = (PyStringObject *)
2743 PyString_FromStringAndSize(NULL, result_len)) )
2744 return NULL;
2745
2746 self_s = PyString_AS_STRING(self);
2747 to_s = PyString_AS_STRING(to);
2748 to_len = PyString_GET_SIZE(to);
2749 result_s = PyString_AS_STRING(result);
2750
2751 /* TODO: special case single character, which doesn't need memcpy */
2752
2753 /* Lay the first one down (guaranteed this will occur) */
2754 memcpy(result_s, to_s, to_len);
2755 result_s += to_len;
2756 count -= 1;
2757
2758 for (i=0; i<count; i++) {
2759 *result_s++ = *self_s++;
2760 memcpy(result_s, to_s, to_len);
2761 result_s += to_len;
2762 }
2763
2764 /* Copy the rest of the original string */
2765 memcpy(result_s, self_s, self_len-i);
2766
2767 return result;
2768}
2769
2770/* Special case for deleting a single character */
2771/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2772static PyStringObject *
2773replace_delete_single_character(PyStringObject *self,
2774 char from_c, Py_ssize_t maxcount)
2775{
2776 char *self_s, *result_s;
2777 char *start, *next, *end;
2778 Py_ssize_t self_len, result_len;
2779 Py_ssize_t count;
2780 PyStringObject *result;
2781
2782 self_len = PyString_GET_SIZE(self);
2783 self_s = PyString_AS_STRING(self);
2784
2785 count = countchar(self_s, self_len, from_c);
2786 if (count == 0) {
2787 return return_self(self);
2788 }
2789 if (count > maxcount)
2790 count = maxcount;
2791
2792 result_len = self_len - count; /* from_len == 1 */
2793 assert(result_len>=0);
2794
2795 if ( (result = (PyStringObject *)
2796 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2797 return NULL;
2798 result_s = PyString_AS_STRING(result);
2799
2800 start = self_s;
2801 end = self_s + self_len;
2802 while (count-- > 0) {
2803 next = findchar(start, end-start, from_c);
2804 if (next == NULL)
2805 break;
2806 memcpy(result_s, start, next-start);
2807 result_s += (next-start);
2808 start = next+1;
2809 }
2810 memcpy(result_s, start, end-start);
2811
2812 return result;
2813}
2814
2815/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2816
2817static PyStringObject *
2818replace_delete_substring(PyStringObject *self, PyStringObject *from,
2819 Py_ssize_t maxcount) {
2820 char *self_s, *from_s, *result_s;
2821 char *start, *next, *end;
2822 Py_ssize_t self_len, from_len, result_len;
2823 Py_ssize_t count, offset;
2824 PyStringObject *result;
2825
2826 self_len = PyString_GET_SIZE(self);
2827 self_s = PyString_AS_STRING(self);
2828 from_len = PyString_GET_SIZE(from);
2829 from_s = PyString_AS_STRING(from);
2830
2831 count = countstring(self_s, self_len,
2832 from_s, from_len,
2833 0, self_len, 1);
2834
2835 if (count > maxcount)
2836 count = maxcount;
2837
2838 if (count == 0) {
2839 /* no matches */
2840 return return_self(self);
2841 }
2842
2843 result_len = self_len - (count * from_len);
2844 assert (result_len>=0);
2845
2846 if ( (result = (PyStringObject *)
2847 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2848 return NULL;
2849
2850 result_s = PyString_AS_STRING(result);
2851
2852 start = self_s;
2853 end = self_s + self_len;
2854 while (count-- > 0) {
2855 offset = findstring(start, end-start,
2856 from_s, from_len,
2857 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002858 if (offset == -1)
2859 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002860 next = start + offset;
2861
2862 memcpy(result_s, start, next-start);
2863
2864 result_s += (next-start);
2865 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002866 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 memcpy(result_s, start, end-start);
2868 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002869}
2870
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2872static PyStringObject *
2873replace_single_character_in_place(PyStringObject *self,
2874 char from_c, char to_c,
2875 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002876{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002877 char *self_s, *result_s, *start, *end, *next;
2878 Py_ssize_t self_len;
2879 PyStringObject *result;
2880
2881 /* The result string will be the same size */
2882 self_s = PyString_AS_STRING(self);
2883 self_len = PyString_GET_SIZE(self);
2884
2885 next = findchar(self_s, self_len, from_c);
2886
2887 if (next == NULL) {
2888 /* No matches; return the original string */
2889 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002890 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891
2892 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002893 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 if (result == NULL)
2895 return NULL;
2896 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002897 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898
2899 /* change everything in-place, starting with this one */
2900 start = result_s + (next-self_s);
2901 *start = to_c;
2902 start++;
2903 end = result_s + self_len;
2904
2905 while (--maxcount > 0) {
2906 next = findchar(start, end-start, from_c);
2907 if (next == NULL)
2908 break;
2909 *next = to_c;
2910 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002911 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912
2913 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002914}
2915
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2917static PyStringObject *
2918replace_substring_in_place(PyStringObject *self,
2919 PyStringObject *from,
2920 PyStringObject *to,
2921 Py_ssize_t maxcount)
2922{
2923 char *result_s, *start, *end;
2924 char *self_s, *from_s, *to_s;
2925 Py_ssize_t self_len, from_len, offset;
2926 PyStringObject *result;
2927
2928 /* The result string will be the same size */
2929
2930 self_s = PyString_AS_STRING(self);
2931 self_len = PyString_GET_SIZE(self);
2932
2933 from_s = PyString_AS_STRING(from);
2934 from_len = PyString_GET_SIZE(from);
2935 to_s = PyString_AS_STRING(to);
2936
2937 offset = findstring(self_s, self_len,
2938 from_s, from_len,
2939 0, self_len, FORWARD);
2940
2941 if (offset == -1) {
2942 /* No matches; return the original string */
2943 return return_self(self);
2944 }
2945
2946 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002947 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002948 if (result == NULL)
2949 return NULL;
2950 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002951 memcpy(result_s, self_s, self_len);
2952
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953
2954 /* change everything in-place, starting with this one */
2955 start = result_s + offset;
2956 memcpy(start, to_s, from_len);
2957 start += from_len;
2958 end = result_s + self_len;
2959
2960 while ( --maxcount > 0) {
2961 offset = findstring(start, end-start,
2962 from_s, from_len,
2963 0, end-start, FORWARD);
2964 if (offset==-1)
2965 break;
2966 memcpy(start+offset, to_s, from_len);
2967 start += offset+from_len;
2968 }
2969
2970 return result;
2971}
2972
2973/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2974static PyStringObject *
2975replace_single_character(PyStringObject *self,
2976 char from_c,
2977 PyStringObject *to,
2978 Py_ssize_t maxcount)
2979{
2980 char *self_s, *to_s, *result_s;
2981 char *start, *next, *end;
2982 Py_ssize_t self_len, to_len, result_len;
2983 Py_ssize_t count, product;
2984 PyStringObject *result;
2985
2986 self_s = PyString_AS_STRING(self);
2987 self_len = PyString_GET_SIZE(self);
2988
2989 count = countchar(self_s, self_len, from_c);
2990 if (count > maxcount)
2991 count = maxcount;
2992
2993 if (count == 0) {
2994 /* no matches, return unchanged */
2995 return return_self(self);
2996 }
2997
2998 to_s = PyString_AS_STRING(to);
2999 to_len = PyString_GET_SIZE(to);
3000
3001 /* use the difference between current and new, hence the "-1" */
3002 /* result_len = self_len + count * (to_len-1) */
3003 product = count * (to_len-1);
3004 if (product / (to_len-1) != count) {
3005 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3006 return NULL;
3007 }
3008 result_len = self_len + product;
3009 if (result_len < 0) {
3010 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3011 return NULL;
3012 }
3013
3014 if ( (result = (PyStringObject *)
3015 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3016 return NULL;
3017 result_s = PyString_AS_STRING(result);
3018
3019 start = self_s;
3020 end = self_s + self_len;
3021 while (count-- > 0) {
3022 next = findchar(start, end-start, from_c);
3023 if (next == NULL)
3024 break;
3025
3026 if (next == start) {
3027 /* replace with the 'to' */
3028 memcpy(result_s, to_s, to_len);
3029 result_s += to_len;
3030 start += 1;
3031 } else {
3032 /* copy the unchanged old then the 'to' */
3033 memcpy(result_s, start, next-start);
3034 result_s += (next-start);
3035 memcpy(result_s, to_s, to_len);
3036 result_s += to_len;
3037 start = next+1;
3038 }
3039 }
3040 /* Copy the remainder of the remaining string */
3041 memcpy(result_s, start, end-start);
3042
3043 return result;
3044}
3045
3046/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3047static PyStringObject *
3048replace_substring(PyStringObject *self,
3049 PyStringObject *from,
3050 PyStringObject *to,
3051 Py_ssize_t maxcount) {
3052 char *self_s, *from_s, *to_s, *result_s;
3053 char *start, *next, *end;
3054 Py_ssize_t self_len, from_len, to_len, result_len;
3055 Py_ssize_t count, offset, product;
3056 PyStringObject *result;
3057
3058 self_s = PyString_AS_STRING(self);
3059 self_len = PyString_GET_SIZE(self);
3060 from_s = PyString_AS_STRING(from);
3061 from_len = PyString_GET_SIZE(from);
3062
3063 count = countstring(self_s, self_len,
3064 from_s, from_len,
3065 0, self_len, FORWARD);
3066 if (count > maxcount)
3067 count = maxcount;
3068
3069 if (count == 0) {
3070 /* no matches, return unchanged */
3071 return return_self(self);
3072 }
3073
3074 to_s = PyString_AS_STRING(to);
3075 to_len = PyString_GET_SIZE(to);
3076
3077 /* Check for overflow */
3078 /* result_len = self_len + count * (to_len-from_len) */
3079 product = count * (to_len-from_len);
3080 if (product / (to_len-from_len) != count) {
3081 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3082 return NULL;
3083 }
3084 result_len = self_len + product;
3085 if (result_len < 0) {
3086 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3087 return NULL;
3088 }
3089
3090 if ( (result = (PyStringObject *)
3091 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3092 return NULL;
3093 result_s = PyString_AS_STRING(result);
3094
3095 start = self_s;
3096 end = self_s + self_len;
3097 while (count-- > 0) {
3098 offset = findstring(start, end-start,
3099 from_s, from_len,
3100 0, end-start, FORWARD);
3101 if (offset == -1)
3102 break;
3103 next = start+offset;
3104 if (next == start) {
3105 /* replace with the 'to' */
3106 memcpy(result_s, to_s, to_len);
3107 result_s += to_len;
3108 start += from_len;
3109 } else {
3110 /* copy the unchanged old then the 'to' */
3111 memcpy(result_s, start, next-start);
3112 result_s += (next-start);
3113 memcpy(result_s, to_s, to_len);
3114 result_s += to_len;
3115 start = next+from_len;
3116 }
3117 }
3118 /* Copy the remainder of the remaining string */
3119 memcpy(result_s, start, end-start);
3120
3121 return result;
3122}
3123
3124
3125static PyStringObject *
3126replace(PyStringObject *self,
3127 PyStringObject *from,
3128 PyStringObject *to,
3129 Py_ssize_t maxcount)
3130{
3131 Py_ssize_t from_len, to_len;
3132
3133 if (maxcount < 0) {
3134 maxcount = PY_SSIZE_T_MAX;
3135 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3136 /* nothing to do; return the original string */
3137 return return_self(self);
3138 }
3139
3140 from_len = PyString_GET_SIZE(from);
3141 to_len = PyString_GET_SIZE(to);
3142
3143 if (maxcount == 0 ||
3144 (from_len == 0 && to_len == 0)) {
3145 /* nothing to do; return the original string */
3146 return return_self(self);
3147 }
3148
3149 /* Handle zero-length special cases */
3150
3151 if (from_len == 0) {
3152 /* insert the 'to' string everywhere. */
3153 /* >>> "Python".replace("", ".") */
3154 /* '.P.y.t.h.o.n.' */
3155 return replace_interleave(self, to, maxcount);
3156 }
3157
3158 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3159 /* point for an empty self string to generate a non-empty string */
3160 /* Special case so the remaining code always gets a non-empty string */
3161 if (PyString_GET_SIZE(self) == 0) {
3162 return return_self(self);
3163 }
3164
3165 if (to_len == 0) {
3166 /* delete all occurances of 'from' string */
3167 if (from_len == 1) {
3168 return replace_delete_single_character(
3169 self, PyString_AS_STRING(from)[0], maxcount);
3170 } else {
3171 return replace_delete_substring(self, from, maxcount);
3172 }
3173 }
3174
3175 /* Handle special case where both strings have the same length */
3176
3177 if (from_len == to_len) {
3178 if (from_len == 1) {
3179 return replace_single_character_in_place(
3180 self,
3181 PyString_AS_STRING(from)[0],
3182 PyString_AS_STRING(to)[0],
3183 maxcount);
3184 } else {
3185 return replace_substring_in_place(
3186 self, from, to, maxcount);
3187 }
3188 }
3189
3190 /* Otherwise use the more generic algorithms */
3191 if (from_len == 1) {
3192 return replace_single_character(self, PyString_AS_STRING(from)[0],
3193 to, maxcount);
3194 } else {
3195 /* len('from')>=2, len('to')>=1 */
3196 return replace_substring(self, from, to, maxcount);
3197 }
3198}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003200PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003201"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202\n\
3203Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003204old replaced by new. If the optional argument count is\n\
3205given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003206
3207static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003208string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003210 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003211 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003212 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003213 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003214
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003215 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003217
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003218 if (PyString_Check(from)) {
3219 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003221#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003222 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003223 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003224 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003225#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003226 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003227 return NULL;
3228
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003229 if (PyString_Check(to)) {
3230 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003232#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003233 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003234 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003235 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003236#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003237 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238 return NULL;
3239
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003240 return (PyObject *)replace((PyStringObject *) self,
3241 (PyStringObject *) from,
3242 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003243}
3244
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003245/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003247PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003248"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003249\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003250Return True if S starts with the specified prefix, False otherwise.\n\
3251With optional start, test S beginning at that position.\n\
3252With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003253
3254static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003255string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003258 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003259 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003260 Py_ssize_t plen;
3261 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003262 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003264
Guido van Rossumc6821402000-05-08 14:08:05 +00003265 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3266 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003267 return NULL;
3268 if (PyString_Check(subobj)) {
3269 prefix = PyString_AS_STRING(subobj);
3270 plen = PyString_GET_SIZE(subobj);
3271 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003272#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003273 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003274 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003275 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003276 subobj, start, end, -1);
3277 if (rc == -1)
3278 return NULL;
3279 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003280 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003281 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003282#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003283 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003284 return NULL;
3285
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003286 string_adjust_indices(&start, &end, len);
3287
3288 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003289 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003290
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003291 if (end-start >= plen)
3292 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3293 else
3294 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003295}
3296
3297
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003298PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003299"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003300\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003301Return True if S ends with the specified suffix, False otherwise.\n\
3302With optional start, test S beginning at that position.\n\
3303With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003304
3305static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003306string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003307{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003308 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003309 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003311 Py_ssize_t slen;
3312 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003313 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003314 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003315
Guido van Rossumc6821402000-05-08 14:08:05 +00003316 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3317 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003318 return NULL;
3319 if (PyString_Check(subobj)) {
3320 suffix = PyString_AS_STRING(subobj);
3321 slen = PyString_GET_SIZE(subobj);
3322 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003323#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003324 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003325 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003326 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003327 subobj, start, end, +1);
3328 if (rc == -1)
3329 return NULL;
3330 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003331 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003332 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003333#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003334 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003335 return NULL;
3336
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003337 string_adjust_indices(&start, &end, len);
3338
3339 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003340 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003341
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003342 if (end-slen > start)
3343 start = end - slen;
3344 if (end-start >= slen)
3345 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3346 else
3347 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003348}
3349
3350
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003351PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003352"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003353\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003354Encodes S using the codec registered for encoding. encoding defaults\n\
3355to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003356handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003357a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3358'xmlcharrefreplace' as well as any other name registered with\n\
3359codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003360
3361static PyObject *
3362string_encode(PyStringObject *self, PyObject *args)
3363{
3364 char *encoding = NULL;
3365 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003366 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003367
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003368 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3369 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003370 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003371 if (v == NULL)
3372 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003373 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3374 PyErr_Format(PyExc_TypeError,
3375 "encoder did not return a string/unicode object "
3376 "(type=%.400s)",
3377 v->ob_type->tp_name);
3378 Py_DECREF(v);
3379 return NULL;
3380 }
3381 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003382
3383 onError:
3384 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003385}
3386
3387
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003388PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003389"S.decode([encoding[,errors]]) -> object\n\
3390\n\
3391Decodes S using the codec registered for encoding. encoding defaults\n\
3392to the default encoding. errors may be given to set a different error\n\
3393handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003394a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3395as well as any other name registerd with codecs.register_error that is\n\
3396able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003397
3398static PyObject *
3399string_decode(PyStringObject *self, PyObject *args)
3400{
3401 char *encoding = NULL;
3402 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003403 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003404
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003405 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3406 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003407 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003408 if (v == NULL)
3409 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003410 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3411 PyErr_Format(PyExc_TypeError,
3412 "decoder did not return a string/unicode object "
3413 "(type=%.400s)",
3414 v->ob_type->tp_name);
3415 Py_DECREF(v);
3416 return NULL;
3417 }
3418 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003419
3420 onError:
3421 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003422}
3423
3424
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003425PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426"S.expandtabs([tabsize]) -> string\n\
3427\n\
3428Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003429If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430
3431static PyObject*
3432string_expandtabs(PyStringObject *self, PyObject *args)
3433{
3434 const char *e, *p;
3435 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003436 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003437 PyObject *u;
3438 int tabsize = 8;
3439
3440 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3441 return NULL;
3442
Thomas Wouters7e474022000-07-16 12:04:32 +00003443 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444 i = j = 0;
3445 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3446 for (p = PyString_AS_STRING(self); p < e; p++)
3447 if (*p == '\t') {
3448 if (tabsize > 0)
3449 j += tabsize - (j % tabsize);
3450 }
3451 else {
3452 j++;
3453 if (*p == '\n' || *p == '\r') {
3454 i += j;
3455 j = 0;
3456 }
3457 }
3458
3459 /* Second pass: create output string and fill it */
3460 u = PyString_FromStringAndSize(NULL, i + j);
3461 if (!u)
3462 return NULL;
3463
3464 j = 0;
3465 q = PyString_AS_STRING(u);
3466
3467 for (p = PyString_AS_STRING(self); p < e; p++)
3468 if (*p == '\t') {
3469 if (tabsize > 0) {
3470 i = tabsize - (j % tabsize);
3471 j += i;
3472 while (i--)
3473 *q++ = ' ';
3474 }
3475 }
3476 else {
3477 j++;
3478 *q++ = *p;
3479 if (*p == '\n' || *p == '\r')
3480 j = 0;
3481 }
3482
3483 return u;
3484}
3485
Tim Peters8fa5dd02001-09-12 02:18:30 +00003486static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003487pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488{
3489 PyObject *u;
3490
3491 if (left < 0)
3492 left = 0;
3493 if (right < 0)
3494 right = 0;
3495
Tim Peters8fa5dd02001-09-12 02:18:30 +00003496 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003497 Py_INCREF(self);
3498 return (PyObject *)self;
3499 }
3500
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003501 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502 left + PyString_GET_SIZE(self) + right);
3503 if (u) {
3504 if (left)
3505 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003506 memcpy(PyString_AS_STRING(u) + left,
3507 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003508 PyString_GET_SIZE(self));
3509 if (right)
3510 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3511 fill, right);
3512 }
3513
3514 return u;
3515}
3516
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003517PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003518"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003519"\n"
3520"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003521"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522
3523static PyObject *
3524string_ljust(PyStringObject *self, PyObject *args)
3525{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003526 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003527 char fillchar = ' ';
3528
Thomas Wouters4abb3662006-04-19 14:50:15 +00003529 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530 return NULL;
3531
Tim Peters8fa5dd02001-09-12 02:18:30 +00003532 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003533 Py_INCREF(self);
3534 return (PyObject*) self;
3535 }
3536
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003537 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538}
3539
3540
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003541PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003542"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003543"\n"
3544"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003545"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546
3547static PyObject *
3548string_rjust(PyStringObject *self, PyObject *args)
3549{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003550 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003551 char fillchar = ' ';
3552
Thomas Wouters4abb3662006-04-19 14:50:15 +00003553 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554 return NULL;
3555
Tim Peters8fa5dd02001-09-12 02:18:30 +00003556 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557 Py_INCREF(self);
3558 return (PyObject*) self;
3559 }
3560
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003561 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562}
3563
3564
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003565PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003566"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003567"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003568"Return S centered in a string of length width. Padding is\n"
3569"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003570
3571static PyObject *
3572string_center(PyStringObject *self, PyObject *args)
3573{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003574 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003575 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003576 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577
Thomas Wouters4abb3662006-04-19 14:50:15 +00003578 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003579 return NULL;
3580
Tim Peters8fa5dd02001-09-12 02:18:30 +00003581 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003582 Py_INCREF(self);
3583 return (PyObject*) self;
3584 }
3585
3586 marg = width - PyString_GET_SIZE(self);
3587 left = marg / 2 + (marg & width & 1);
3588
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003589 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590}
3591
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003592PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003593"S.zfill(width) -> string\n"
3594"\n"
3595"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003597
3598static PyObject *
3599string_zfill(PyStringObject *self, PyObject *args)
3600{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003601 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003602 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003603 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003604 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003605
Thomas Wouters4abb3662006-04-19 14:50:15 +00003606 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003607 return NULL;
3608
3609 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003610 if (PyString_CheckExact(self)) {
3611 Py_INCREF(self);
3612 return (PyObject*) self;
3613 }
3614 else
3615 return PyString_FromStringAndSize(
3616 PyString_AS_STRING(self),
3617 PyString_GET_SIZE(self)
3618 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003619 }
3620
3621 fill = width - PyString_GET_SIZE(self);
3622
3623 s = pad(self, fill, 0, '0');
3624
3625 if (s == NULL)
3626 return NULL;
3627
3628 p = PyString_AS_STRING(s);
3629 if (p[fill] == '+' || p[fill] == '-') {
3630 /* move sign to beginning of string */
3631 p[0] = p[fill];
3632 p[fill] = '0';
3633 }
3634
3635 return (PyObject*) s;
3636}
3637
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003638PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003639"S.isspace() -> bool\n\
3640\n\
3641Return True if all characters in S are whitespace\n\
3642and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643
3644static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003645string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646{
Fred Drakeba096332000-07-09 07:04:36 +00003647 register const unsigned char *p
3648 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003649 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651 /* Shortcut for single character strings */
3652 if (PyString_GET_SIZE(self) == 1 &&
3653 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003656 /* Special case for empty strings */
3657 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003659
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 e = p + PyString_GET_SIZE(self);
3661 for (; p < e; p++) {
3662 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666}
3667
3668
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003669PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003671\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003672Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003673and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003674
3675static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003676string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003677{
Fred Drakeba096332000-07-09 07:04:36 +00003678 register const unsigned char *p
3679 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680 register const unsigned char *e;
3681
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003682 /* Shortcut for single character strings */
3683 if (PyString_GET_SIZE(self) == 1 &&
3684 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003685 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003686
3687 /* Special case for empty strings */
3688 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003690
3691 e = p + PyString_GET_SIZE(self);
3692 for (; p < e; p++) {
3693 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003695 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003697}
3698
3699
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003700PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003702\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003703Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003704and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003705
3706static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003707string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003708{
Fred Drakeba096332000-07-09 07:04:36 +00003709 register const unsigned char *p
3710 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003711 register const unsigned char *e;
3712
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003713 /* Shortcut for single character strings */
3714 if (PyString_GET_SIZE(self) == 1 &&
3715 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003717
3718 /* Special case for empty strings */
3719 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003721
3722 e = p + PyString_GET_SIZE(self);
3723 for (; p < e; p++) {
3724 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003726 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003728}
3729
3730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003731PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003734Return True if all characters in S are digits\n\
3735and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736
3737static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003738string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739{
Fred Drakeba096332000-07-09 07:04:36 +00003740 register const unsigned char *p
3741 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003742 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744 /* Shortcut for single character strings */
3745 if (PyString_GET_SIZE(self) == 1 &&
3746 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003747 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003749 /* Special case for empty strings */
3750 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003751 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003752
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753 e = p + PyString_GET_SIZE(self);
3754 for (; p < e; p++) {
3755 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003756 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003758 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759}
3760
3761
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003762PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003763"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003765Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003766at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767
3768static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003769string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770{
Fred Drakeba096332000-07-09 07:04:36 +00003771 register const unsigned char *p
3772 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003773 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 int cased;
3775
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776 /* Shortcut for single character strings */
3777 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003778 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003780 /* Special case for empty strings */
3781 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003782 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003783
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 e = p + PyString_GET_SIZE(self);
3785 cased = 0;
3786 for (; p < e; p++) {
3787 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003788 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 else if (!cased && islower(*p))
3790 cased = 1;
3791 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003792 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793}
3794
3795
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003796PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003797"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003799Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003800at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801
3802static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003803string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804{
Fred Drakeba096332000-07-09 07:04:36 +00003805 register const unsigned char *p
3806 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003807 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 int cased;
3809
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 /* Shortcut for single character strings */
3811 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003812 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003814 /* Special case for empty strings */
3815 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003816 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003817
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818 e = p + PyString_GET_SIZE(self);
3819 cased = 0;
3820 for (; p < e; p++) {
3821 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003822 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003823 else if (!cased && isupper(*p))
3824 cased = 1;
3825 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003826 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827}
3828
3829
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003830PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003831"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003832\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003833Return True if S is a titlecased string and there is at least one\n\
3834character in S, i.e. uppercase characters may only follow uncased\n\
3835characters and lowercase characters only cased ones. Return False\n\
3836otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837
3838static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003839string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840{
Fred Drakeba096332000-07-09 07:04:36 +00003841 register const unsigned char *p
3842 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003843 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844 int cased, previous_is_cased;
3845
Guido van Rossum4c08d552000-03-10 22:55:18 +00003846 /* Shortcut for single character strings */
3847 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003848 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003850 /* Special case for empty strings */
3851 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003852 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003853
Guido van Rossum4c08d552000-03-10 22:55:18 +00003854 e = p + PyString_GET_SIZE(self);
3855 cased = 0;
3856 previous_is_cased = 0;
3857 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003858 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003859
3860 if (isupper(ch)) {
3861 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003862 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003863 previous_is_cased = 1;
3864 cased = 1;
3865 }
3866 else if (islower(ch)) {
3867 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003868 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003869 previous_is_cased = 1;
3870 cased = 1;
3871 }
3872 else
3873 previous_is_cased = 0;
3874 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003875 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003876}
3877
3878
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003879PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003880"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003881\n\
3882Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003883Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003884is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003885
Guido van Rossum4c08d552000-03-10 22:55:18 +00003886static PyObject*
3887string_splitlines(PyStringObject *self, PyObject *args)
3888{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003889 register Py_ssize_t i;
3890 register Py_ssize_t j;
3891 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003892 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003893 PyObject *list;
3894 PyObject *str;
3895 char *data;
3896
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003897 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003898 return NULL;
3899
3900 data = PyString_AS_STRING(self);
3901 len = PyString_GET_SIZE(self);
3902
Guido van Rossum4c08d552000-03-10 22:55:18 +00003903 list = PyList_New(0);
3904 if (!list)
3905 goto onError;
3906
3907 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003908 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003909
Guido van Rossum4c08d552000-03-10 22:55:18 +00003910 /* Find a line and append it */
3911 while (i < len && data[i] != '\n' && data[i] != '\r')
3912 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003913
3914 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003915 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003916 if (i < len) {
3917 if (data[i] == '\r' && i + 1 < len &&
3918 data[i+1] == '\n')
3919 i += 2;
3920 else
3921 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003922 if (keepends)
3923 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003924 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003925 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003926 j = i;
3927 }
3928 if (j < len) {
3929 SPLIT_APPEND(data, j, len);
3930 }
3931
3932 return list;
3933
3934 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003935 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003936 return NULL;
3937}
3938
3939#undef SPLIT_APPEND
3940
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003941static PyObject *
3942string_getnewargs(PyStringObject *v)
3943{
3944 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3945}
3946
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003947
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003948static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003949string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003950 /* Counterparts of the obsolete stropmodule functions; except
3951 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003952 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3953 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003954 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003955 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3956 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003957 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3958 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3959 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3960 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3961 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3962 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3963 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003964 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3965 capitalize__doc__},
3966 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3967 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3968 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003969 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003970 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3971 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3972 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3973 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3974 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3975 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3976 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3977 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3978 startswith__doc__},
3979 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3980 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3981 swapcase__doc__},
3982 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3983 translate__doc__},
3984 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3985 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3986 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3987 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3988 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3989 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3990 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3991 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3992 expandtabs__doc__},
3993 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3994 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003995 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003996 {NULL, NULL} /* sentinel */
3997};
3998
Jeremy Hylton938ace62002-07-17 16:30:39 +00003999static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004000str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4001
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004002static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004004{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004005 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004006 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004007
Guido van Rossumae960af2001-08-30 03:11:59 +00004008 if (type != &PyString_Type)
4009 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004010 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4011 return NULL;
4012 if (x == NULL)
4013 return PyString_FromString("");
4014 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004015}
4016
Guido van Rossumae960af2001-08-30 03:11:59 +00004017static PyObject *
4018str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4019{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004020 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004021 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004022
4023 assert(PyType_IsSubtype(type, &PyString_Type));
4024 tmp = string_new(&PyString_Type, args, kwds);
4025 if (tmp == NULL)
4026 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004027 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004028 n = PyString_GET_SIZE(tmp);
4029 pnew = type->tp_alloc(type, n);
4030 if (pnew != NULL) {
4031 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004032 ((PyStringObject *)pnew)->ob_shash =
4033 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004034 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004035 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004036 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004037 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004038}
4039
Guido van Rossumcacfc072002-05-24 19:01:59 +00004040static PyObject *
4041basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4042{
4043 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004044 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004045 return NULL;
4046}
4047
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004048static PyObject *
4049string_mod(PyObject *v, PyObject *w)
4050{
4051 if (!PyString_Check(v)) {
4052 Py_INCREF(Py_NotImplemented);
4053 return Py_NotImplemented;
4054 }
4055 return PyString_Format(v, w);
4056}
4057
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004058PyDoc_STRVAR(basestring_doc,
4059"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004060
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004061static PyNumberMethods string_as_number = {
4062 0, /*nb_add*/
4063 0, /*nb_subtract*/
4064 0, /*nb_multiply*/
4065 0, /*nb_divide*/
4066 string_mod, /*nb_remainder*/
4067};
4068
4069
Guido van Rossumcacfc072002-05-24 19:01:59 +00004070PyTypeObject PyBaseString_Type = {
4071 PyObject_HEAD_INIT(&PyType_Type)
4072 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004073 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004074 0,
4075 0,
4076 0, /* tp_dealloc */
4077 0, /* tp_print */
4078 0, /* tp_getattr */
4079 0, /* tp_setattr */
4080 0, /* tp_compare */
4081 0, /* tp_repr */
4082 0, /* tp_as_number */
4083 0, /* tp_as_sequence */
4084 0, /* tp_as_mapping */
4085 0, /* tp_hash */
4086 0, /* tp_call */
4087 0, /* tp_str */
4088 0, /* tp_getattro */
4089 0, /* tp_setattro */
4090 0, /* tp_as_buffer */
4091 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4092 basestring_doc, /* tp_doc */
4093 0, /* tp_traverse */
4094 0, /* tp_clear */
4095 0, /* tp_richcompare */
4096 0, /* tp_weaklistoffset */
4097 0, /* tp_iter */
4098 0, /* tp_iternext */
4099 0, /* tp_methods */
4100 0, /* tp_members */
4101 0, /* tp_getset */
4102 &PyBaseObject_Type, /* tp_base */
4103 0, /* tp_dict */
4104 0, /* tp_descr_get */
4105 0, /* tp_descr_set */
4106 0, /* tp_dictoffset */
4107 0, /* tp_init */
4108 0, /* tp_alloc */
4109 basestring_new, /* tp_new */
4110 0, /* tp_free */
4111};
4112
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004113PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004114"str(object) -> string\n\
4115\n\
4116Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004117If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004118
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004119PyTypeObject PyString_Type = {
4120 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004121 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004122 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004124 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004125 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004126 (printfunc)string_print, /* tp_print */
4127 0, /* tp_getattr */
4128 0, /* tp_setattr */
4129 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004130 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004131 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004132 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004133 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004134 (hashfunc)string_hash, /* tp_hash */
4135 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004136 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004137 PyObject_GenericGetAttr, /* tp_getattro */
4138 0, /* tp_setattro */
4139 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004140 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004141 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004142 string_doc, /* tp_doc */
4143 0, /* tp_traverse */
4144 0, /* tp_clear */
4145 (richcmpfunc)string_richcompare, /* tp_richcompare */
4146 0, /* tp_weaklistoffset */
4147 0, /* tp_iter */
4148 0, /* tp_iternext */
4149 string_methods, /* tp_methods */
4150 0, /* tp_members */
4151 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004152 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004153 0, /* tp_dict */
4154 0, /* tp_descr_get */
4155 0, /* tp_descr_set */
4156 0, /* tp_dictoffset */
4157 0, /* tp_init */
4158 0, /* tp_alloc */
4159 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004160 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004161};
4162
4163void
Fred Drakeba096332000-07-09 07:04:36 +00004164PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004165{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004166 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004167 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004168 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004169 if (w == NULL || !PyString_Check(*pv)) {
4170 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004171 *pv = NULL;
4172 return;
4173 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004174 v = string_concat((PyStringObject *) *pv, w);
4175 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004176 *pv = v;
4177}
4178
Guido van Rossum013142a1994-08-30 08:19:36 +00004179void
Fred Drakeba096332000-07-09 07:04:36 +00004180PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004182 PyString_Concat(pv, w);
4183 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004184}
4185
4186
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004187/* The following function breaks the notion that strings are immutable:
4188 it changes the size of a string. We get away with this only if there
4189 is only one module referencing the object. You can also think of it
4190 as creating a new string object and destroying the old one, only
4191 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004192 already be known to some other part of the code...
4193 Note that if there's not enough memory to resize the string, the original
4194 string object at *pv is deallocated, *pv is set to NULL, an "out of
4195 memory" exception is set, and -1 is returned. Else (on success) 0 is
4196 returned, and the value in *pv may or may not be the same as on input.
4197 As always, an extra byte is allocated for a trailing \0 byte (newsize
4198 does *not* include that), and a trailing \0 byte is stored.
4199*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004200
4201int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004202_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004203{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004204 register PyObject *v;
4205 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004206 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004207 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4208 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004209 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004210 Py_DECREF(v);
4211 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004212 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004213 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004214 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004215 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004216 _Py_ForgetReference(v);
4217 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004218 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004219 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004220 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004221 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004222 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004223 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004224 _Py_NewReference(*pv);
4225 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004226 sv->ob_size = newsize;
4227 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004228 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004229 return 0;
4230}
Guido van Rossume5372401993-03-16 12:15:04 +00004231
4232/* Helpers for formatstring */
4233
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004234static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004235getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004236{
Thomas Wouters977485d2006-02-16 15:59:12 +00004237 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004238 if (argidx < arglen) {
4239 (*p_argidx)++;
4240 if (arglen < 0)
4241 return args;
4242 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004243 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004244 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004245 PyErr_SetString(PyExc_TypeError,
4246 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004247 return NULL;
4248}
4249
Tim Peters38fd5b62000-09-21 05:43:11 +00004250/* Format codes
4251 * F_LJUST '-'
4252 * F_SIGN '+'
4253 * F_BLANK ' '
4254 * F_ALT '#'
4255 * F_ZERO '0'
4256 */
Guido van Rossume5372401993-03-16 12:15:04 +00004257#define F_LJUST (1<<0)
4258#define F_SIGN (1<<1)
4259#define F_BLANK (1<<2)
4260#define F_ALT (1<<3)
4261#define F_ZERO (1<<4)
4262
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004263static int
Fred Drakeba096332000-07-09 07:04:36 +00004264formatfloat(char *buf, size_t buflen, int flags,
4265 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004266{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004267 /* fmt = '%#.' + `prec` + `type`
4268 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004269 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004270 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004271 x = PyFloat_AsDouble(v);
4272 if (x == -1.0 && PyErr_Occurred()) {
4273 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004274 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004275 }
Guido van Rossume5372401993-03-16 12:15:04 +00004276 if (prec < 0)
4277 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004278 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4279 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004280 /* Worst case length calc to ensure no buffer overrun:
4281
4282 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004283 fmt = %#.<prec>g
4284 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004285 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004286 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004287
4288 'f' formats:
4289 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4290 len = 1 + 50 + 1 + prec = 52 + prec
4291
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004292 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004293 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004294
4295 */
4296 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4297 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004298 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004299 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004300 return -1;
4301 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004302 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4303 (flags&F_ALT) ? "#" : "",
4304 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004305 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004306 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004307}
4308
Tim Peters38fd5b62000-09-21 05:43:11 +00004309/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4310 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4311 * Python's regular ints.
4312 * Return value: a new PyString*, or NULL if error.
4313 * . *pbuf is set to point into it,
4314 * *plen set to the # of chars following that.
4315 * Caller must decref it when done using pbuf.
4316 * The string starting at *pbuf is of the form
4317 * "-"? ("0x" | "0X")? digit+
4318 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004319 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004320 * There will be at least prec digits, zero-filled on the left if
4321 * necessary to get that many.
4322 * val object to be converted
4323 * flags bitmask of format flags; only F_ALT is looked at
4324 * prec minimum number of digits; 0-fill on left if needed
4325 * type a character in [duoxX]; u acts the same as d
4326 *
4327 * CAUTION: o, x and X conversions on regular ints can never
4328 * produce a '-' sign, but can for Python's unbounded ints.
4329 */
4330PyObject*
4331_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4332 char **pbuf, int *plen)
4333{
4334 PyObject *result = NULL;
4335 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004336 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004337 int sign; /* 1 if '-', else 0 */
4338 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004339 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004340 int numdigits; /* len == numnondigits + numdigits */
4341 int numnondigits = 0;
4342
4343 switch (type) {
4344 case 'd':
4345 case 'u':
4346 result = val->ob_type->tp_str(val);
4347 break;
4348 case 'o':
4349 result = val->ob_type->tp_as_number->nb_oct(val);
4350 break;
4351 case 'x':
4352 case 'X':
4353 numnondigits = 2;
4354 result = val->ob_type->tp_as_number->nb_hex(val);
4355 break;
4356 default:
4357 assert(!"'type' not in [duoxX]");
4358 }
4359 if (!result)
4360 return NULL;
4361
4362 /* To modify the string in-place, there can only be one reference. */
4363 if (result->ob_refcnt != 1) {
4364 PyErr_BadInternalCall();
4365 return NULL;
4366 }
4367 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004368 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004369 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004370 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4371 return NULL;
4372 }
4373 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004374 if (buf[len-1] == 'L') {
4375 --len;
4376 buf[len] = '\0';
4377 }
4378 sign = buf[0] == '-';
4379 numnondigits += sign;
4380 numdigits = len - numnondigits;
4381 assert(numdigits > 0);
4382
Tim Petersfff53252001-04-12 18:38:48 +00004383 /* Get rid of base marker unless F_ALT */
4384 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004385 /* Need to skip 0x, 0X or 0. */
4386 int skipped = 0;
4387 switch (type) {
4388 case 'o':
4389 assert(buf[sign] == '0');
4390 /* If 0 is only digit, leave it alone. */
4391 if (numdigits > 1) {
4392 skipped = 1;
4393 --numdigits;
4394 }
4395 break;
4396 case 'x':
4397 case 'X':
4398 assert(buf[sign] == '0');
4399 assert(buf[sign + 1] == 'x');
4400 skipped = 2;
4401 numnondigits -= 2;
4402 break;
4403 }
4404 if (skipped) {
4405 buf += skipped;
4406 len -= skipped;
4407 if (sign)
4408 buf[0] = '-';
4409 }
4410 assert(len == numnondigits + numdigits);
4411 assert(numdigits > 0);
4412 }
4413
4414 /* Fill with leading zeroes to meet minimum width. */
4415 if (prec > numdigits) {
4416 PyObject *r1 = PyString_FromStringAndSize(NULL,
4417 numnondigits + prec);
4418 char *b1;
4419 if (!r1) {
4420 Py_DECREF(result);
4421 return NULL;
4422 }
4423 b1 = PyString_AS_STRING(r1);
4424 for (i = 0; i < numnondigits; ++i)
4425 *b1++ = *buf++;
4426 for (i = 0; i < prec - numdigits; i++)
4427 *b1++ = '0';
4428 for (i = 0; i < numdigits; i++)
4429 *b1++ = *buf++;
4430 *b1 = '\0';
4431 Py_DECREF(result);
4432 result = r1;
4433 buf = PyString_AS_STRING(result);
4434 len = numnondigits + prec;
4435 }
4436
4437 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004438 if (type == 'X') {
4439 /* Need to convert all lower case letters to upper case.
4440 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004441 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004442 if (buf[i] >= 'a' && buf[i] <= 'x')
4443 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004444 }
4445 *pbuf = buf;
4446 *plen = len;
4447 return result;
4448}
4449
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004450static int
Fred Drakeba096332000-07-09 07:04:36 +00004451formatint(char *buf, size_t buflen, int flags,
4452 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004453{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004454 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004455 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4456 + 1 + 1 = 24 */
4457 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004458 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004459 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004460
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004461 x = PyInt_AsLong(v);
4462 if (x == -1 && PyErr_Occurred()) {
4463 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004464 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004465 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004466 if (x < 0 && type == 'u') {
4467 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004468 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004469 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4470 sign = "-";
4471 else
4472 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004473 if (prec < 0)
4474 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004475
4476 if ((flags & F_ALT) &&
4477 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004478 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004479 * of issues that cause pain:
4480 * - when 0 is being converted, the C standard leaves off
4481 * the '0x' or '0X', which is inconsistent with other
4482 * %#x/%#X conversions and inconsistent with Python's
4483 * hex() function
4484 * - there are platforms that violate the standard and
4485 * convert 0 with the '0x' or '0X'
4486 * (Metrowerks, Compaq Tru64)
4487 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004488 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004489 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004490 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004491 * We can achieve the desired consistency by inserting our
4492 * own '0x' or '0X' prefix, and substituting %x/%X in place
4493 * of %#x/%#X.
4494 *
4495 * Note that this is the same approach as used in
4496 * formatint() in unicodeobject.c
4497 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004498 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4499 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004500 }
4501 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004502 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4503 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004504 prec, type);
4505 }
4506
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004507 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4508 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004509 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004510 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004511 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004512 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004513 return -1;
4514 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004515 if (sign[0])
4516 PyOS_snprintf(buf, buflen, fmt, -x);
4517 else
4518 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004519 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004520}
4521
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004522static int
Fred Drakeba096332000-07-09 07:04:36 +00004523formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004524{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004525 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004526 if (PyString_Check(v)) {
4527 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004528 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004529 }
4530 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004531 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004532 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004533 }
4534 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004535 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004536}
4537
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004538/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4539
4540 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4541 chars are formatted. XXX This is a magic number. Each formatting
4542 routine does bounds checking to ensure no overflow, but a better
4543 solution may be to malloc a buffer of appropriate size for each
4544 format. For now, the current solution is sufficient.
4545*/
4546#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004547
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004548PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004549PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004550{
4551 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004552 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004553 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004554 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004555 PyObject *result, *orig_args;
4556#ifdef Py_USING_UNICODE
4557 PyObject *v, *w;
4558#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004559 PyObject *dict = NULL;
4560 if (format == NULL || !PyString_Check(format) || args == NULL) {
4561 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004562 return NULL;
4563 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004564 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004565 fmt = PyString_AS_STRING(format);
4566 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004567 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004568 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004569 if (result == NULL)
4570 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 res = PyString_AsString(result);
4572 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004573 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004574 argidx = 0;
4575 }
4576 else {
4577 arglen = -1;
4578 argidx = -2;
4579 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004580 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4581 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004582 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004583 while (--fmtcnt >= 0) {
4584 if (*fmt != '%') {
4585 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004586 rescnt = fmtcnt + 100;
4587 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004588 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004589 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004590 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004591 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004592 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004593 }
4594 *res++ = *fmt++;
4595 }
4596 else {
4597 /* Got a format specifier */
4598 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004599 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004600 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004601 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004602 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004603 PyObject *v = NULL;
4604 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004605 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004606 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004607 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004608 char formatbuf[FORMATBUFLEN];
4609 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004610#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004611 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004612 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004613#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004614
Guido van Rossumda9c2711996-12-05 21:58:58 +00004615 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004616 if (*fmt == '(') {
4617 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004618 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004619 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004620 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004621
4622 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004623 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004624 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004625 goto error;
4626 }
4627 ++fmt;
4628 --fmtcnt;
4629 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004630 /* Skip over balanced parentheses */
4631 while (pcount > 0 && --fmtcnt >= 0) {
4632 if (*fmt == ')')
4633 --pcount;
4634 else if (*fmt == '(')
4635 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004636 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004637 }
4638 keylen = fmt - keystart - 1;
4639 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004640 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004641 "incomplete format key");
4642 goto error;
4643 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004644 key = PyString_FromStringAndSize(keystart,
4645 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004646 if (key == NULL)
4647 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004648 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004650 args_owned = 0;
4651 }
4652 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004653 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004654 if (args == NULL) {
4655 goto error;
4656 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004657 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004658 arglen = -1;
4659 argidx = -2;
4660 }
Guido van Rossume5372401993-03-16 12:15:04 +00004661 while (--fmtcnt >= 0) {
4662 switch (c = *fmt++) {
4663 case '-': flags |= F_LJUST; continue;
4664 case '+': flags |= F_SIGN; continue;
4665 case ' ': flags |= F_BLANK; continue;
4666 case '#': flags |= F_ALT; continue;
4667 case '0': flags |= F_ZERO; continue;
4668 }
4669 break;
4670 }
4671 if (c == '*') {
4672 v = getnextarg(args, arglen, &argidx);
4673 if (v == NULL)
4674 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004675 if (!PyInt_Check(v)) {
4676 PyErr_SetString(PyExc_TypeError,
4677 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004678 goto error;
4679 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004680 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004681 if (width < 0) {
4682 flags |= F_LJUST;
4683 width = -width;
4684 }
Guido van Rossume5372401993-03-16 12:15:04 +00004685 if (--fmtcnt >= 0)
4686 c = *fmt++;
4687 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004688 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004689 width = c - '0';
4690 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004691 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004692 if (!isdigit(c))
4693 break;
4694 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004695 PyErr_SetString(
4696 PyExc_ValueError,
4697 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004698 goto error;
4699 }
4700 width = width*10 + (c - '0');
4701 }
4702 }
4703 if (c == '.') {
4704 prec = 0;
4705 if (--fmtcnt >= 0)
4706 c = *fmt++;
4707 if (c == '*') {
4708 v = getnextarg(args, arglen, &argidx);
4709 if (v == NULL)
4710 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004711 if (!PyInt_Check(v)) {
4712 PyErr_SetString(
4713 PyExc_TypeError,
4714 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004715 goto error;
4716 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004717 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004718 if (prec < 0)
4719 prec = 0;
4720 if (--fmtcnt >= 0)
4721 c = *fmt++;
4722 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004723 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004724 prec = c - '0';
4725 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004726 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004727 if (!isdigit(c))
4728 break;
4729 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004730 PyErr_SetString(
4731 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004732 "prec too big");
4733 goto error;
4734 }
4735 prec = prec*10 + (c - '0');
4736 }
4737 }
4738 } /* prec */
4739 if (fmtcnt >= 0) {
4740 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004741 if (--fmtcnt >= 0)
4742 c = *fmt++;
4743 }
4744 }
4745 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004746 PyErr_SetString(PyExc_ValueError,
4747 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004748 goto error;
4749 }
4750 if (c != '%') {
4751 v = getnextarg(args, arglen, &argidx);
4752 if (v == NULL)
4753 goto error;
4754 }
4755 sign = 0;
4756 fill = ' ';
4757 switch (c) {
4758 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004759 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004760 len = 1;
4761 break;
4762 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004763#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004764 if (PyUnicode_Check(v)) {
4765 fmt = fmt_start;
4766 argidx = argidx_start;
4767 goto unicode;
4768 }
Georg Brandld45014b2005-10-01 17:06:00 +00004769#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004770 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004771#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004772 if (temp != NULL && PyUnicode_Check(temp)) {
4773 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004774 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004775 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004776 goto unicode;
4777 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004778#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004779 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004780 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004781 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004782 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004783 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004784 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004785 if (!PyString_Check(temp)) {
4786 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004787 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004788 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004789 goto error;
4790 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004791 pbuf = PyString_AS_STRING(temp);
4792 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004793 if (prec >= 0 && len > prec)
4794 len = prec;
4795 break;
4796 case 'i':
4797 case 'd':
4798 case 'u':
4799 case 'o':
4800 case 'x':
4801 case 'X':
4802 if (c == 'i')
4803 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004804 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004805 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004806 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004807 prec, c, &pbuf, &ilen);
4808 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004809 if (!temp)
4810 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004811 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004812 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 else {
4814 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004815 len = formatint(pbuf,
4816 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 flags, prec, c, v);
4818 if (len < 0)
4819 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004820 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004821 }
4822 if (flags & F_ZERO)
4823 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004824 break;
4825 case 'e':
4826 case 'E':
4827 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004828 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004829 case 'g':
4830 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004831 if (c == 'F')
4832 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004833 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004834 len = formatfloat(pbuf, sizeof(formatbuf),
4835 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004836 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004837 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004838 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004839 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004840 fill = '0';
4841 break;
4842 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004843#ifdef Py_USING_UNICODE
4844 if (PyUnicode_Check(v)) {
4845 fmt = fmt_start;
4846 argidx = argidx_start;
4847 goto unicode;
4848 }
4849#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004850 pbuf = formatbuf;
4851 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004852 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004853 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004854 break;
4855 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004856 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004857 "unsupported format character '%c' (0x%x) "
4858 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004859 c, c,
4860 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004861 goto error;
4862 }
4863 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004864 if (*pbuf == '-' || *pbuf == '+') {
4865 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004866 len--;
4867 }
4868 else if (flags & F_SIGN)
4869 sign = '+';
4870 else if (flags & F_BLANK)
4871 sign = ' ';
4872 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004873 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004874 }
4875 if (width < len)
4876 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004877 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004878 reslen -= rescnt;
4879 rescnt = width + fmtcnt + 100;
4880 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004881 if (reslen < 0) {
4882 Py_DECREF(result);
4883 return PyErr_NoMemory();
4884 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004885 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004886 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004887 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004888 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004889 }
4890 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004891 if (fill != ' ')
4892 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004893 rescnt--;
4894 if (width > len)
4895 width--;
4896 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004897 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4898 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004899 assert(pbuf[1] == c);
4900 if (fill != ' ') {
4901 *res++ = *pbuf++;
4902 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004903 }
Tim Petersfff53252001-04-12 18:38:48 +00004904 rescnt -= 2;
4905 width -= 2;
4906 if (width < 0)
4907 width = 0;
4908 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004909 }
4910 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004911 do {
4912 --rescnt;
4913 *res++ = fill;
4914 } while (--width > len);
4915 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004916 if (fill == ' ') {
4917 if (sign)
4918 *res++ = sign;
4919 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004920 (c == 'x' || c == 'X')) {
4921 assert(pbuf[0] == '0');
4922 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004923 *res++ = *pbuf++;
4924 *res++ = *pbuf++;
4925 }
4926 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004927 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004928 res += len;
4929 rescnt -= len;
4930 while (--width >= len) {
4931 --rescnt;
4932 *res++ = ' ';
4933 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004934 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004935 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004936 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004937 goto error;
4938 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004939 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004940 } /* '%' */
4941 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004942 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004943 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004944 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004945 goto error;
4946 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004947 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004948 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004949 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004950 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004951 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004952
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004953#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004954 unicode:
4955 if (args_owned) {
4956 Py_DECREF(args);
4957 args_owned = 0;
4958 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004959 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004960 if (PyTuple_Check(orig_args) && argidx > 0) {
4961 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004962 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004963 v = PyTuple_New(n);
4964 if (v == NULL)
4965 goto error;
4966 while (--n >= 0) {
4967 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4968 Py_INCREF(w);
4969 PyTuple_SET_ITEM(v, n, w);
4970 }
4971 args = v;
4972 } else {
4973 Py_INCREF(orig_args);
4974 args = orig_args;
4975 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004976 args_owned = 1;
4977 /* Take what we have of the result and let the Unicode formatting
4978 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004979 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004980 if (_PyString_Resize(&result, rescnt))
4981 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004982 fmtcnt = PyString_GET_SIZE(format) - \
4983 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004984 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4985 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004986 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004987 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004988 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004989 if (v == NULL)
4990 goto error;
4991 /* Paste what we have (result) to what the Unicode formatting
4992 function returned (v) and return the result (or error) */
4993 w = PyUnicode_Concat(result, v);
4994 Py_DECREF(result);
4995 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004996 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004997 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004998#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004999
Guido van Rossume5372401993-03-16 12:15:04 +00005000 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005001 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005002 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005003 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005004 }
Guido van Rossume5372401993-03-16 12:15:04 +00005005 return NULL;
5006}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005007
Guido van Rossum2a61e741997-01-18 07:55:05 +00005008void
Fred Drakeba096332000-07-09 07:04:36 +00005009PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005010{
5011 register PyStringObject *s = (PyStringObject *)(*p);
5012 PyObject *t;
5013 if (s == NULL || !PyString_Check(s))
5014 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005015 /* If it's a string subclass, we don't really know what putting
5016 it in the interned dict might do. */
5017 if (!PyString_CheckExact(s))
5018 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005020 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005021 if (interned == NULL) {
5022 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005023 if (interned == NULL) {
5024 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005025 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005026 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005027 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005028 t = PyDict_GetItem(interned, (PyObject *)s);
5029 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005030 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005031 Py_DECREF(*p);
5032 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005033 return;
5034 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005035
Armin Rigo79f7ad22004-08-07 19:27:39 +00005036 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005037 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005038 return;
5039 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005040 /* The two references in interned are not counted by refcnt.
5041 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00005042 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005043 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005044}
5045
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005046void
5047PyString_InternImmortal(PyObject **p)
5048{
5049 PyString_InternInPlace(p);
5050 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5051 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5052 Py_INCREF(*p);
5053 }
5054}
5055
Guido van Rossum2a61e741997-01-18 07:55:05 +00005056
5057PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005058PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005059{
5060 PyObject *s = PyString_FromString(cp);
5061 if (s == NULL)
5062 return NULL;
5063 PyString_InternInPlace(&s);
5064 return s;
5065}
5066
Guido van Rossum8cf04761997-08-02 02:57:45 +00005067void
Fred Drakeba096332000-07-09 07:04:36 +00005068PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005069{
5070 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005071 for (i = 0; i < UCHAR_MAX + 1; i++) {
5072 Py_XDECREF(characters[i]);
5073 characters[i] = NULL;
5074 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005075 Py_XDECREF(nullstring);
5076 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005077}
Barry Warsawa903ad982001-02-23 16:40:48 +00005078
Barry Warsawa903ad982001-02-23 16:40:48 +00005079void _Py_ReleaseInternedStrings(void)
5080{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005081 PyObject *keys;
5082 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005083 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005084
5085 if (interned == NULL || !PyDict_Check(interned))
5086 return;
5087 keys = PyDict_Keys(interned);
5088 if (keys == NULL || !PyList_Check(keys)) {
5089 PyErr_Clear();
5090 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005091 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005092
5093 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5094 detector, interned strings are not forcibly deallocated; rather, we
5095 give them their stolen references back, and then clear and DECREF
5096 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005097
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005098 fprintf(stderr, "releasing interned strings\n");
5099 n = PyList_GET_SIZE(keys);
5100 for (i = 0; i < n; i++) {
5101 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5102 switch (s->ob_sstate) {
5103 case SSTATE_NOT_INTERNED:
5104 /* XXX Shouldn't happen */
5105 break;
5106 case SSTATE_INTERNED_IMMORTAL:
5107 s->ob_refcnt += 1;
5108 break;
5109 case SSTATE_INTERNED_MORTAL:
5110 s->ob_refcnt += 2;
5111 break;
5112 default:
5113 Py_FatalError("Inconsistent interned string state.");
5114 }
5115 s->ob_sstate = SSTATE_NOT_INTERNED;
5116 }
5117 Py_DECREF(keys);
5118 PyDict_Clear(interned);
5119 Py_DECREF(interned);
5120 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005121}