blob: 95ade513b1fea5d96f8d6f73ad82f82dd4b5e82e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Fredrik Lundhaf722372006-05-25 17:55:31 +00008#undef USE_INLINE /* XXX - set via configure? */
9
10#if defined(_MSC_VER) /* this is taken from _sre.c */
11#pragma warning(disable: 4710)
12/* fastest possible local call under MSVC */
13#define LOCAL(type) static __inline type __fastcall
14#elif defined(USE_INLINE)
15#define LOCAL(type) static inline type
16#else
17#define LOCAL(type) static type
18#endif
19
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020#ifdef COUNT_ALLOCS
21int null_strings, one_strings;
22#endif
23
Guido van Rossumc0b618a1997-05-02 03:12:38 +000024static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000025static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026
Guido van Rossum45ec02a2002-08-19 21:43:18 +000027/* This dictionary holds all interned strings. Note that references to
28 strings in this dictionary are *not* counted in the string's ob_refcnt.
29 When the interned string reaches a refcnt of 0 the string deallocation
30 function will delete the reference from this dictionary.
31
Tim Petersae1d0c92006-03-17 03:29:34 +000032 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000033 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
34*/
35static PyObject *interned;
36
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000039 For both PyString_FromString() and PyString_FromStringAndSize(), the
40 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000041 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000042
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000044 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 For PyString_FromStringAndSize(), the parameter the parameter `str' is
47 either NULL or else points to a string containing at least `size' bytes.
48 For PyString_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
54 PyString object must be treated as immutable and you must not fill in nor
55 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000056
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000057 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
59 allocated for string data, not counting the null terminating character. It
60 is therefore equal to the equal to the `size' parameter (for
61 PyString_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000065PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000066{
Tim Peters9e897f42001-05-09 07:37:07 +000067 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000068 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 Py_INCREF(op);
74 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000085
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000086 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000087 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000090 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000092 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000096 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000104 PyObject *t = (PyObject *)op;
105 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000106 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111}
112
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000114PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Tim Peters62de65b2001-12-06 20:29:32 +0000116 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000117 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000118
119 assert(str != NULL);
120 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000121 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000122 PyErr_SetString(PyExc_OverflowError,
123 "string is too long for a Python string");
124 return NULL;
125 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 if (size == 0 && (op = nullstring) != NULL) {
127#ifdef COUNT_ALLOCS
128 null_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
133 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134#ifdef COUNT_ALLOCS
135 one_strings++;
136#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 Py_INCREF(op);
138 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000141 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000142 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000147 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000148 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000149 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000170 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000178#ifdef __va_copy
179 __va_copy(count, vargs);
180#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000181 count = vargs;
182#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
188 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
189 ;
190
Tim Peters8931ff12006-05-13 23:28:20 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000196 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000197
Barry Warsawdadace02001-08-24 18:32:06 +0000198 switch (*f) {
199 case 'c':
200 (void)va_arg(count, int);
201 /* fall through... */
202 case '%':
203 n++;
204 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000205 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000206 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 /* 20 bytes is enough to hold a 64-bit
208 integer. Decimal takes the most space.
209 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000210 n += 20;
211 break;
212 case 's':
213 s = va_arg(count, char*);
214 n += strlen(s);
215 break;
216 case 'p':
217 (void) va_arg(count, int);
218 /* maximum 64-bit pointer representation:
219 * 0xffffffffffffffff
220 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000222 */
223 n += 19;
224 break;
225 default:
226 /* if we stumble upon an unknown
227 formatting code, copy the rest of
228 the format string to the output
229 string. (we cannot just skip the
230 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 n += strlen(p);
233 goto expand;
234 }
235 } else
236 n++;
237 }
238 expand:
239 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000240 /* Since we've analyzed how much space we need for the worst case,
241 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000242 string = PyString_FromStringAndSize(NULL, n);
243 if (!string)
244 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000245
Barry Warsawdadace02001-08-24 18:32:06 +0000246 s = PyString_AsString(string);
247
248 for (f = format; *f; f++) {
249 if (*f == '%') {
250 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 Py_ssize_t i;
252 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000253 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000254 /* parse the width.precision part (we're only
255 interested in the precision value, if any) */
256 n = 0;
257 while (isdigit(Py_CHARMASK(*f)))
258 n = (n*10) + *f++ - '0';
259 if (*f == '.') {
260 f++;
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 }
265 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
266 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000267 /* handle the long flag, but only for %ld and %lu.
268 others can be added when necessary. */
269 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000270 longflag = 1;
271 ++f;
272 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000273 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000274 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000275 size_tflag = 1;
276 ++f;
277 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000278
Barry Warsawdadace02001-08-24 18:32:06 +0000279 switch (*f) {
280 case 'c':
281 *s++ = va_arg(vargs, int);
282 break;
283 case 'd':
284 if (longflag)
285 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000286 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
288 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
290 sprintf(s, "%d", va_arg(vargs, int));
291 s += strlen(s);
292 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000293 case 'u':
294 if (longflag)
295 sprintf(s, "%lu",
296 va_arg(vargs, unsigned long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
299 va_arg(vargs, size_t));
300 else
301 sprintf(s, "%u",
302 va_arg(vargs, unsigned int));
303 s += strlen(s);
304 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000305 case 'i':
306 sprintf(s, "%i", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 'x':
310 sprintf(s, "%x", va_arg(vargs, int));
311 s += strlen(s);
312 break;
313 case 's':
314 p = va_arg(vargs, char*);
315 i = strlen(p);
316 if (n > 0 && i > n)
317 i = n;
318 memcpy(s, p, i);
319 s += i;
320 break;
321 case 'p':
322 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000323 /* %p is ill-defined: ensure leading 0x. */
324 if (s[1] == 'X')
325 s[1] = 'x';
326 else if (s[1] != 'x') {
327 memmove(s+2, s, strlen(s)+1);
328 s[0] = '0';
329 s[1] = 'x';
330 }
Barry Warsawdadace02001-08-24 18:32:06 +0000331 s += strlen(s);
332 break;
333 case '%':
334 *s++ = '%';
335 break;
336 default:
337 strcpy(s, p);
338 s += strlen(s);
339 goto end;
340 }
341 } else
342 *s++ = *f;
343 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344
Barry Warsawdadace02001-08-24 18:32:06 +0000345 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000347 return string;
348}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000349
Barry Warsawdadace02001-08-24 18:32:06 +0000350PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000351PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000352{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354 va_list vargs;
355
356#ifdef HAVE_STDARG_PROTOTYPES
357 va_start(vargs, format);
358#else
359 va_start(vargs);
360#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000361 ret = PyString_FromFormatV(format, vargs);
362 va_end(vargs);
363 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000364}
365
366
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000368 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 const char *encoding,
370 const char *errors)
371{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 PyObject *v, *str;
373
374 str = PyString_FromStringAndSize(s, size);
375 if (str == NULL)
376 return NULL;
377 v = PyString_AsDecodedString(str, encoding, errors);
378 Py_DECREF(str);
379 return v;
380}
381
382PyObject *PyString_AsDecodedObject(PyObject *str,
383 const char *encoding,
384 const char *errors)
385{
386 PyObject *v;
387
388 if (!PyString_Check(str)) {
389 PyErr_BadArgument();
390 goto onError;
391 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393 if (encoding == NULL) {
394#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#else
397 PyErr_SetString(PyExc_ValueError, "no encoding specified");
398 goto onError;
399#endif
400 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401
402 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 v = PyCodec_Decode(str, encoding, errors);
404 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000405 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000406
407 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000410 return NULL;
411}
412
413PyObject *PyString_AsDecodedString(PyObject *str,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v;
418
419 v = PyString_AsDecodedObject(str, encoding, errors);
420 if (v == NULL)
421 goto onError;
422
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000423#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424 /* Convert Unicode to a string using the default encoding */
425 if (PyUnicode_Check(v)) {
426 PyObject *temp = v;
427 v = PyUnicode_AsEncodedString(v, NULL, NULL);
428 Py_DECREF(temp);
429 if (v == NULL)
430 goto onError;
431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433 if (!PyString_Check(v)) {
434 PyErr_Format(PyExc_TypeError,
435 "decoder did not return a string object (type=%.400s)",
436 v->ob_type->tp_name);
437 Py_DECREF(v);
438 goto onError;
439 }
440
441 return v;
442
443 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 return NULL;
445}
446
447PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000448 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000453
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 str = PyString_FromStringAndSize(s, size);
455 if (str == NULL)
456 return NULL;
457 v = PyString_AsEncodedString(str, encoding, errors);
458 Py_DECREF(str);
459 return v;
460}
461
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 const char *encoding,
464 const char *errors)
465{
466 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000467
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(str)) {
469 PyErr_BadArgument();
470 goto onError;
471 }
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473 if (encoding == NULL) {
474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#else
477 PyErr_SetString(PyExc_ValueError, "no encoding specified");
478 goto onError;
479#endif
480 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000481
482 /* Encode via the codec registry */
483 v = PyCodec_Encode(str, encoding, errors);
484 if (v == NULL)
485 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
487 return v;
488
489 onError:
490 return NULL;
491}
492
493PyObject *PyString_AsEncodedString(PyObject *str,
494 const char *encoding,
495 const char *errors)
496{
497 PyObject *v;
498
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000499 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000500 if (v == NULL)
501 goto onError;
502
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000503#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 /* Convert Unicode to a string using the default encoding */
505 if (PyUnicode_Check(v)) {
506 PyObject *temp = v;
507 v = PyUnicode_AsEncodedString(v, NULL, NULL);
508 Py_DECREF(temp);
509 if (v == NULL)
510 goto onError;
511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000512#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 if (!PyString_Check(v)) {
514 PyErr_Format(PyExc_TypeError,
515 "encoder did not return a string object (type=%.400s)",
516 v->ob_type->tp_name);
517 Py_DECREF(v);
518 goto onError;
519 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000520
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000521 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000522
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000523 onError:
524 return NULL;
525}
526
Guido van Rossum234f9421993-06-17 12:35:49 +0000527static void
Fred Drakeba096332000-07-09 07:04:36 +0000528string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000529{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000530 switch (PyString_CHECK_INTERNED(op)) {
531 case SSTATE_NOT_INTERNED:
532 break;
533
534 case SSTATE_INTERNED_MORTAL:
535 /* revive dead object temporarily for DelItem */
536 op->ob_refcnt = 3;
537 if (PyDict_DelItem(interned, op) != 0)
538 Py_FatalError(
539 "deletion of interned string failed");
540 break;
541
542 case SSTATE_INTERNED_IMMORTAL:
543 Py_FatalError("Immortal interned string died.");
544
545 default:
546 Py_FatalError("Inconsistent interned string state.");
547 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000548 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000549}
550
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551/* Unescape a backslash-escaped string. If unicode is non-zero,
552 the string is a u-literal. If recode_encoding is non-zero,
553 the string is UTF-8 encoded and should be re-encoded in the
554 specified encoding. */
555
556PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 const char *recode_encoding)
561{
562 int c;
563 char *p, *buf;
564 const char *end;
565 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000566 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000567 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 if (v == NULL)
569 return NULL;
570 p = buf = PyString_AsString(v);
571 end = s + len;
572 while (s < end) {
573 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000574 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575#ifdef Py_USING_UNICODE
576 if (recode_encoding && (*s & 0x80)) {
577 PyObject *u, *w;
578 char *r;
579 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000580 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 t = s;
582 /* Decode non-ASCII bytes as UTF-8. */
583 while (t < end && (*t & 0x80)) t++;
584 u = PyUnicode_DecodeUTF8(s, t - s, errors);
585 if(!u) goto failed;
586
587 /* Recode them in target encoding. */
588 w = PyUnicode_AsEncodedString(
589 u, recode_encoding, errors);
590 Py_DECREF(u);
591 if (!w) goto failed;
592
593 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000594 assert(PyString_Check(w));
595 r = PyString_AS_STRING(w);
596 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 memcpy(p, r, rn);
598 p += rn;
599 Py_DECREF(w);
600 s = t;
601 } else {
602 *p++ = *s++;
603 }
604#else
605 *p++ = *s++;
606#endif
607 continue;
608 }
609 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000610 if (s==end) {
611 PyErr_SetString(PyExc_ValueError,
612 "Trailing \\ in string");
613 goto failed;
614 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000615 switch (*s++) {
616 /* XXX This assumes ASCII! */
617 case '\n': break;
618 case '\\': *p++ = '\\'; break;
619 case '\'': *p++ = '\''; break;
620 case '\"': *p++ = '\"'; break;
621 case 'b': *p++ = '\b'; break;
622 case 'f': *p++ = '\014'; break; /* FF */
623 case 't': *p++ = '\t'; break;
624 case 'n': *p++ = '\n'; break;
625 case 'r': *p++ = '\r'; break;
626 case 'v': *p++ = '\013'; break; /* VT */
627 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
628 case '0': case '1': case '2': case '3':
629 case '4': case '5': case '6': case '7':
630 c = s[-1] - '0';
631 if ('0' <= *s && *s <= '7') {
632 c = (c<<3) + *s++ - '0';
633 if ('0' <= *s && *s <= '7')
634 c = (c<<3) + *s++ - '0';
635 }
636 *p++ = c;
637 break;
638 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000639 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 && isxdigit(Py_CHARMASK(s[1]))) {
641 unsigned int x = 0;
642 c = Py_CHARMASK(*s);
643 s++;
644 if (isdigit(c))
645 x = c - '0';
646 else if (islower(c))
647 x = 10 + c - 'a';
648 else
649 x = 10 + c - 'A';
650 x = x << 4;
651 c = Py_CHARMASK(*s);
652 s++;
653 if (isdigit(c))
654 x += c - '0';
655 else if (islower(c))
656 x += 10 + c - 'a';
657 else
658 x += 10 + c - 'A';
659 *p++ = x;
660 break;
661 }
662 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000663 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667 if (strcmp(errors, "replace") == 0) {
668 *p++ = '?';
669 } else if (strcmp(errors, "ignore") == 0)
670 /* do nothing */;
671 else {
672 PyErr_Format(PyExc_ValueError,
673 "decoding error; "
674 "unknown error handling code: %.400s",
675 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#ifndef Py_USING_UNICODE
679 case 'u':
680 case 'U':
681 case 'N':
682 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000683 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 "Unicode escapes not legal "
685 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000686 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 }
688#endif
689 default:
690 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000691 s--;
692 goto non_esc; /* an arbitry number of unescaped
693 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 }
695 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000696 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000698 return v;
699 failed:
700 Py_DECREF(v);
701 return NULL;
702}
703
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705string_getsize(register PyObject *op)
706{
707 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (PyString_AsStringAndSize(op, &s, &len))
710 return -1;
711 return len;
712}
713
714static /*const*/ char *
715string_getbuffer(register PyObject *op)
716{
717 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000718 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
720 return NULL;
721 return s;
722}
723
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
732/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000733PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (!PyString_Check(op))
736 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738}
739
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740int
741PyString_AsStringAndSize(register PyObject *obj,
742 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000743 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744{
745 if (s == NULL) {
746 PyErr_BadInternalCall();
747 return -1;
748 }
749
750 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 if (PyUnicode_Check(obj)) {
753 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
754 if (obj == NULL)
755 return -1;
756 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000757 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000758#endif
759 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_Format(PyExc_TypeError,
761 "expected string or Unicode object, "
762 "%.200s found", obj->ob_type->tp_name);
763 return -1;
764 }
765 }
766
767 *s = PyString_AS_STRING(obj);
768 if (len != NULL)
769 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000770 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000771 PyErr_SetString(PyExc_TypeError,
772 "expected string without null bytes");
773 return -1;
774 }
775 return 0;
776}
777
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778/* -------------------------------------------------------------------- */
779/* Helpers */
780
781#define USE_FAST /* experimental fast search implementation */
782
783/* XXX - this code is copied from unicodeobject.c. we really should
784 refactor the core implementations (see _sre.c for how this can be
785 done), but that'll have to wait -- fredrik */
786
787/* fast search/count implementation, based on a mix between boyer-
788 moore and horspool, with a few more bells and whistles on the top.
789 for some more background, see: http://effbot.org/stringlib */
790
791/* note: fastsearch may access s[n], which isn't a problem when using
792 Python's ordinary string types, but may cause problems if you're
793 using this code in other contexts. also, the count mode returns -1
794 if there cannot possible be a match in the target string, and 0 if
795 it has actually checked for matches, but didn't find any. callers
796 beware! */
797
798#define FAST_COUNT 0
799#define FAST_SEARCH 1
800
801LOCAL(Py_ssize_t)
802 fastsearch(const unsigned char* s, Py_ssize_t n, const unsigned char* p,
803 Py_ssize_t m, int mode)
804{
805 long mask;
806 int skip, count = 0;
807 Py_ssize_t i, j, mlast, w;
808
809 w = n - m;
810
811 if (w < 0)
812 return -1;
813
814 /* look for special cases */
815 if (m <= 1) {
816 if (m <= 0)
817 return -1;
818 /* use special case for 1-character strings */
819 if (mode == FAST_COUNT) {
820 for (i = 0; i < n; i++)
821 if (s[i] == p[0])
822 count++;
823 return count;
824 } else {
825 for (i = 0; i < n; i++)
826 if (s[i] == p[0])
827 return i;
828 }
829 return -1;
830 }
831
832 mlast = m - 1;
833
834 /* create compressed boyer-moore delta 1 table */
835 skip = mlast - 1;
836 /* process pattern[:-1] */
837 for (mask = i = 0; i < mlast; i++) {
838 mask |= (1 << (p[i] & 0x1F));
839 if (p[i] == p[mlast])
840 skip = mlast - i - 1;
841 }
842 /* process pattern[-1] outside the loop */
843 mask |= (1 << (p[mlast] & 0x1F));
844
845 for (i = 0; i <= w; i++) {
846 /* note: using mlast in the skip path slows things down on x86 */
847 if (s[i+m-1] == p[m-1]) {
848 /* candidate match */
849 for (j = 0; j < mlast; j++)
850 if (s[i+j] != p[j])
851 break;
852 if (j == mlast) {
853 /* got a match! */
854 if (mode != FAST_COUNT)
855 return i;
856 count++;
857 i = i + mlast;
858 continue;
859 }
860 /* miss: check if next character is part of pattern */
861 if (!(mask & (1 << (s[i+m] & 0x1F))))
862 i = i + m;
863 else {
864 i = i + skip;
865 continue;
866 }
867 } else {
868 /* skip: check if next character is part of pattern */
869 if (!(mask & (1 << (s[i+m] & 0x1F))))
870 i = i + m;
871 }
872 }
873
874 if (mode != FAST_COUNT)
875 return -1;
876 return count;
877}
878
879/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880/* Methods */
881
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000882static int
Fred Drakeba096332000-07-09 07:04:36 +0000883string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000885 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000888
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000889 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000890 if (! PyString_CheckExact(op)) {
891 int ret;
892 /* A str subclass may have its own __str__ method. */
893 op = (PyStringObject *) PyObject_Str((PyObject *)op);
894 if (op == NULL)
895 return -1;
896 ret = string_print(op, fp, flags);
897 Py_DECREF(op);
898 return ret;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000901#ifdef __VMS
902 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
903#else
904 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
905#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000906 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000908
Thomas Wouters7e474022000-07-16 12:04:32 +0000909 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000910 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000911 if (memchr(op->ob_sval, '\'', op->ob_size) &&
912 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000913 quote = '"';
914
915 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 for (i = 0; i < op->ob_size; i++) {
917 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000918 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000919 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000920 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000921 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000922 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000923 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000924 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000925 fprintf(fp, "\\r");
926 else if (c < ' ' || c >= 0x7f)
927 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000928 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000929 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000931 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000932 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933}
934
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000935PyObject *
936PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000938 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000939 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000940 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000941 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000942 PyErr_SetString(PyExc_OverflowError,
943 "string is too large to make repr");
944 }
945 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000947 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
949 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951 register char c;
952 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000953 int quote;
954
Thomas Wouters7e474022000-07-16 12:04:32 +0000955 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000956 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000957 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000958 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000959 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000960 quote = '"';
961
Tim Peters9161c8b2001-12-03 01:55:38 +0000962 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000963 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000969 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000971 else if (c == '\t')
972 *p++ = '\\', *p++ = 't';
973 else if (c == '\n')
974 *p++ = '\\', *p++ = 'n';
975 else if (c == '\r')
976 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000977 else if (c < ' ' || c >= 0x7f) {
978 /* For performance, we don't want to call
979 PyOS_snprintf here (extra layers of
980 function call). */
981 sprintf(p, "\\x%02x", c & 0xff);
982 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000983 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000984 else
985 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000987 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000988 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000991 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000992 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994}
995
Guido van Rossum189f1df2001-05-01 16:51:53 +0000996static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000997string_repr(PyObject *op)
998{
999 return PyString_Repr(op, 1);
1000}
1001
1002static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001003string_str(PyObject *s)
1004{
Tim Petersc9933152001-10-16 20:18:24 +00001005 assert(PyString_Check(s));
1006 if (PyString_CheckExact(s)) {
1007 Py_INCREF(s);
1008 return s;
1009 }
1010 else {
1011 /* Subtype -- return genuine string with the same value. */
1012 PyStringObject *t = (PyStringObject *) s;
1013 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
1014 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001015}
1016
Martin v. Löwis18e16552006-02-15 17:27:45 +00001017static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001018string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001019{
1020 return a->ob_size;
1021}
1022
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001026 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 register PyStringObject *op;
1028 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 if (PyUnicode_Check(bb))
1031 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001032#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001033 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001034 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001035 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036 return NULL;
1037 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001040 if ((a->ob_size == 0 || b->ob_size == 0) &&
1041 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042 if (a->ob_size == 0) {
1043 Py_INCREF(bb);
1044 return bb;
1045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 Py_INCREF(a);
1047 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 }
1049 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001051 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001052 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001053 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001054 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001055 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001056 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001057 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001058 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1059 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001060 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001061 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001062#undef b
1063}
1064
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001065static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001066string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001068 register Py_ssize_t i;
1069 register Py_ssize_t j;
1070 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001072 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073 if (n < 0)
1074 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001075 /* watch out for overflows: the size can overflow int,
1076 * and the # of bytes needed can overflow size_t
1077 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001079 if (n && size / n != a->ob_size) {
1080 PyErr_SetString(PyExc_OverflowError,
1081 "repeated string is too long");
1082 return NULL;
1083 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001084 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001085 Py_INCREF(a);
1086 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001087 }
Tim Peterse7c05322004-06-27 17:24:49 +00001088 nbytes = (size_t)size;
1089 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001090 PyErr_SetString(PyExc_OverflowError,
1091 "repeated string is too long");
1092 return NULL;
1093 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001094 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001095 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001096 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001097 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001098 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001099 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001100 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001101 op->ob_sval[size] = '\0';
1102 if (a->ob_size == 1 && n > 0) {
1103 memset(op->ob_sval, a->ob_sval[0] , n);
1104 return (PyObject *) op;
1105 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001106 i = 0;
1107 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001108 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1109 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001110 }
1111 while (i < size) {
1112 j = (i <= size-i) ? i : size-i;
1113 memcpy(op->ob_sval+i, op->ob_sval, j);
1114 i += j;
1115 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001116 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001117}
1118
1119/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1120
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001122string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001123 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001124 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001125{
1126 if (i < 0)
1127 i = 0;
1128 if (j < 0)
1129 j = 0; /* Avoid signed/unsigned bug in next line */
1130 if (j > a->ob_size)
1131 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001132 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1133 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001134 Py_INCREF(a);
1135 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001136 }
1137 if (j < i)
1138 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001139 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001140}
1141
Guido van Rossum9284a572000-03-07 15:53:43 +00001142static int
Fred Drakeba096332000-07-09 07:04:36 +00001143string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001144{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001145 char *s = PyString_AS_STRING(a);
1146 const char *sub = PyString_AS_STRING(el);
1147 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001148 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001149 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001150 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001151
1152 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001153#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001154 if (PyUnicode_Check(el))
1155 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001156#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001157 if (!PyString_Check(el)) {
1158 PyErr_SetString(PyExc_TypeError,
1159 "'in <string>' requires string as left operand");
1160 return -1;
1161 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001162 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001163
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001164 if (len_sub == 0)
1165 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001166 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001167 substring. When s<last, there is still room for a possible match
1168 and s[0] through s[len_sub-1] will be in bounds.
1169 shortsub is len_sub minus the last character which is checked
1170 separately just before the memcmp(). That check helps prevent
1171 false starts and saves the setup time for memcmp().
1172 */
1173 firstchar = sub[0];
1174 shortsub = len_sub - 1;
1175 lastchar = sub[shortsub];
1176 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1177 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001178 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001179 if (s == NULL)
1180 return 0;
1181 assert(s < last);
1182 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001183 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001184 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001185 }
1186 return 0;
1187}
1188
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001189static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001190string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001191{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001192 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001193 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001194 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001195 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001196 return NULL;
1197 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001198 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001199 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001200 if (v == NULL)
1201 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001202 else {
1203#ifdef COUNT_ALLOCS
1204 one_strings++;
1205#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001206 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001207 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001208 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001209}
1210
Martin v. Löwiscd353062001-05-24 16:56:35 +00001211static PyObject*
1212string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001213{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001214 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001215 Py_ssize_t len_a, len_b;
1216 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001217 PyObject *result;
1218
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001219 /* Make sure both arguments are strings. */
1220 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001221 result = Py_NotImplemented;
1222 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001223 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001224 if (a == b) {
1225 switch (op) {
1226 case Py_EQ:case Py_LE:case Py_GE:
1227 result = Py_True;
1228 goto out;
1229 case Py_NE:case Py_LT:case Py_GT:
1230 result = Py_False;
1231 goto out;
1232 }
1233 }
1234 if (op == Py_EQ) {
1235 /* Supporting Py_NE here as well does not save
1236 much time, since Py_NE is rarely used. */
1237 if (a->ob_size == b->ob_size
1238 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001239 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001240 a->ob_size) == 0)) {
1241 result = Py_True;
1242 } else {
1243 result = Py_False;
1244 }
1245 goto out;
1246 }
1247 len_a = a->ob_size; len_b = b->ob_size;
1248 min_len = (len_a < len_b) ? len_a : len_b;
1249 if (min_len > 0) {
1250 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1251 if (c==0)
1252 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1253 }else
1254 c = 0;
1255 if (c == 0)
1256 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1257 switch (op) {
1258 case Py_LT: c = c < 0; break;
1259 case Py_LE: c = c <= 0; break;
1260 case Py_EQ: assert(0); break; /* unreachable */
1261 case Py_NE: c = c != 0; break;
1262 case Py_GT: c = c > 0; break;
1263 case Py_GE: c = c >= 0; break;
1264 default:
1265 result = Py_NotImplemented;
1266 goto out;
1267 }
1268 result = c ? Py_True : Py_False;
1269 out:
1270 Py_INCREF(result);
1271 return result;
1272}
1273
1274int
1275_PyString_Eq(PyObject *o1, PyObject *o2)
1276{
1277 PyStringObject *a, *b;
1278 a = (PyStringObject*)o1;
1279 b = (PyStringObject*)o2;
1280 return a->ob_size == b->ob_size
1281 && *a->ob_sval == *b->ob_sval
1282 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001283}
1284
Guido van Rossum9bfef441993-03-29 10:43:31 +00001285static long
Fred Drakeba096332000-07-09 07:04:36 +00001286string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001287{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001288 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001289 register unsigned char *p;
1290 register long x;
1291
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001292 if (a->ob_shash != -1)
1293 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001294 len = a->ob_size;
1295 p = (unsigned char *) a->ob_sval;
1296 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001297 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001298 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001299 x ^= a->ob_size;
1300 if (x == -1)
1301 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001302 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001303 return x;
1304}
1305
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001306#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1307
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001308static PyObject*
1309string_subscript(PyStringObject* self, PyObject* item)
1310{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001311 PyNumberMethods *nb = item->ob_type->tp_as_number;
1312 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1313 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001314 if (i == -1 && PyErr_Occurred())
1315 return NULL;
1316 if (i < 0)
1317 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001318 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001319 }
1320 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001321 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322 char* source_buf;
1323 char* result_buf;
1324 PyObject* result;
1325
Tim Petersae1d0c92006-03-17 03:29:34 +00001326 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001327 PyString_GET_SIZE(self),
1328 &start, &stop, &step, &slicelength) < 0) {
1329 return NULL;
1330 }
1331
1332 if (slicelength <= 0) {
1333 return PyString_FromStringAndSize("", 0);
1334 }
1335 else {
1336 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001337 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001338 if (result_buf == NULL)
1339 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001340
Tim Petersae1d0c92006-03-17 03:29:34 +00001341 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001342 cur += step, i++) {
1343 result_buf[i] = source_buf[cur];
1344 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001345
1346 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001347 slicelength);
1348 PyMem_Free(result_buf);
1349 return result;
1350 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001351 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001352 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001353 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001354 "string indices must be integers");
1355 return NULL;
1356 }
1357}
1358
Martin v. Löwis18e16552006-02-15 17:27:45 +00001359static Py_ssize_t
1360string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001361{
1362 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001363 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001364 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001365 return -1;
1366 }
1367 *ptr = (void *)self->ob_sval;
1368 return self->ob_size;
1369}
1370
Martin v. Löwis18e16552006-02-15 17:27:45 +00001371static Py_ssize_t
1372string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001373{
Guido van Rossum045e6881997-09-08 18:30:11 +00001374 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001375 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001376 return -1;
1377}
1378
Martin v. Löwis18e16552006-02-15 17:27:45 +00001379static Py_ssize_t
1380string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001381{
1382 if ( lenp )
1383 *lenp = self->ob_size;
1384 return 1;
1385}
1386
Martin v. Löwis18e16552006-02-15 17:27:45 +00001387static Py_ssize_t
1388string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001389{
1390 if ( index != 0 ) {
1391 PyErr_SetString(PyExc_SystemError,
1392 "accessing non-existent string segment");
1393 return -1;
1394 }
1395 *ptr = self->ob_sval;
1396 return self->ob_size;
1397}
1398
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001399static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001400 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001401 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001402 (ssizeargfunc)string_repeat, /*sq_repeat*/
1403 (ssizeargfunc)string_item, /*sq_item*/
1404 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001405 0, /*sq_ass_item*/
1406 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001407 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001408};
1409
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001410static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001411 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001412 (binaryfunc)string_subscript,
1413 0,
1414};
1415
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001416static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001417 (readbufferproc)string_buffer_getreadbuf,
1418 (writebufferproc)string_buffer_getwritebuf,
1419 (segcountproc)string_buffer_getsegcount,
1420 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001421};
1422
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
1424
1425#define LEFTSTRIP 0
1426#define RIGHTSTRIP 1
1427#define BOTHSTRIP 2
1428
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001429/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001430static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1431
1432#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001433
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434#define SPLIT_APPEND(data, left, right) \
1435 str = PyString_FromStringAndSize((data) + (left), \
1436 (right) - (left)); \
1437 if (str == NULL) \
1438 goto onError; \
1439 if (PyList_Append(list, str)) { \
1440 Py_DECREF(str); \
1441 goto onError; \
1442 } \
1443 else \
1444 Py_DECREF(str);
1445
1446#define SPLIT_INSERT(data, left, right) \
1447 str = PyString_FromStringAndSize((data) + (left), \
1448 (right) - (left)); \
1449 if (str == NULL) \
1450 goto onError; \
1451 if (PyList_Insert(list, 0, str)) { \
1452 Py_DECREF(str); \
1453 goto onError; \
1454 } \
1455 else \
1456 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457
1458static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001459split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001461 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463 PyObject *list = PyList_New(0);
1464
1465 if (list == NULL)
1466 return NULL;
1467
Guido van Rossum4c08d552000-03-10 22:55:18 +00001468 for (i = j = 0; i < len; ) {
1469 while (i < len && isspace(Py_CHARMASK(s[i])))
1470 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 while (i < len && !isspace(Py_CHARMASK(s[i])))
1473 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475 if (maxsplit-- <= 0)
1476 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 while (i < len && isspace(Py_CHARMASK(s[i])))
1479 i++;
1480 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 }
1482 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 Py_DECREF(list);
1489 return NULL;
1490}
1491
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001493split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001494{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001495 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001496 PyObject *str;
1497 PyObject *list = PyList_New(0);
1498
1499 if (list == NULL)
1500 return NULL;
1501
1502 for (i = j = 0; i < len; ) {
1503 if (s[i] == ch) {
1504 if (maxcount-- <= 0)
1505 break;
1506 SPLIT_APPEND(s, j, i);
1507 i = j = i + 1;
1508 } else
1509 i++;
1510 }
1511 if (j <= len) {
1512 SPLIT_APPEND(s, j, len);
1513 }
1514 return list;
1515
1516 onError:
1517 Py_DECREF(list);
1518 return NULL;
1519}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001521PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522"S.split([sep [,maxsplit]]) -> list of strings\n\
1523\n\
1524Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001525delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001526splits are done. If sep is not specified or is None, any\n\
1527whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528
1529static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001530string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001532 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1533 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001534 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 const char *s = PyString_AS_STRING(self), *sub;
1536 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537
Martin v. Löwis9c830762006-04-13 08:37:17 +00001538 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001541 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001544 if (PyString_Check(subobj)) {
1545 sub = PyString_AS_STRING(subobj);
1546 n = PyString_GET_SIZE(subobj);
1547 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001548#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 else if (PyUnicode_Check(subobj))
1550 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001551#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1553 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001554
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 if (n == 0) {
1556 PyErr_SetString(PyExc_ValueError, "empty separator");
1557 return NULL;
1558 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001559 else if (n == 1)
1560 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561
1562 list = PyList_New(0);
1563 if (list == NULL)
1564 return NULL;
1565
1566 i = j = 0;
1567 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001568 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (maxsplit-- <= 0)
1570 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001571 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572 if (item == NULL)
1573 goto fail;
1574 err = PyList_Append(list, item);
1575 Py_DECREF(item);
1576 if (err < 0)
1577 goto fail;
1578 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 }
1580 else
1581 i++;
1582 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001583 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584 if (item == NULL)
1585 goto fail;
1586 err = PyList_Append(list, item);
1587 Py_DECREF(item);
1588 if (err < 0)
1589 goto fail;
1590
1591 return list;
1592
1593 fail:
1594 Py_DECREF(list);
1595 return NULL;
1596}
1597
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001599rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001600{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001601 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001602 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001603 PyObject *list = PyList_New(0);
1604
1605 if (list == NULL)
1606 return NULL;
1607
1608 for (i = j = len - 1; i >= 0; ) {
1609 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1610 i--;
1611 j = i;
1612 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1613 i--;
1614 if (j > i) {
1615 if (maxsplit-- <= 0)
1616 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001617 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001618 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1619 i--;
1620 j = i;
1621 }
1622 }
1623 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001624 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001625 }
1626 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001628 Py_DECREF(list);
1629 return NULL;
1630}
1631
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001632static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001633rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001635 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001636 PyObject *str;
1637 PyObject *list = PyList_New(0);
1638
1639 if (list == NULL)
1640 return NULL;
1641
1642 for (i = j = len - 1; i >= 0; ) {
1643 if (s[i] == ch) {
1644 if (maxcount-- <= 0)
1645 break;
1646 SPLIT_INSERT(s, i + 1, j + 1);
1647 j = i = i - 1;
1648 } else
1649 i--;
1650 }
1651 if (j >= -1) {
1652 SPLIT_INSERT(s, 0, j + 1);
1653 }
1654 return list;
1655
1656 onError:
1657 Py_DECREF(list);
1658 return NULL;
1659}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660
1661PyDoc_STRVAR(rsplit__doc__,
1662"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1663\n\
1664Return a list of the words in the string S, using sep as the\n\
1665delimiter string, starting at the end of the string and working\n\
1666to the front. If maxsplit is given, at most maxsplit splits are\n\
1667done. If sep is not specified or is None, any whitespace string\n\
1668is a separator.");
1669
1670static PyObject *
1671string_rsplit(PyStringObject *self, PyObject *args)
1672{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001673 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1674 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001675 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676 const char *s = PyString_AS_STRING(self), *sub;
1677 PyObject *list, *item, *subobj = Py_None;
1678
Martin v. Löwis9c830762006-04-13 08:37:17 +00001679 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 return NULL;
1681 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001682 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683 if (subobj == Py_None)
1684 return rsplit_whitespace(s, len, maxsplit);
1685 if (PyString_Check(subobj)) {
1686 sub = PyString_AS_STRING(subobj);
1687 n = PyString_GET_SIZE(subobj);
1688 }
1689#ifdef Py_USING_UNICODE
1690 else if (PyUnicode_Check(subobj))
1691 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1692#endif
1693 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1694 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001695
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001696 if (n == 0) {
1697 PyErr_SetString(PyExc_ValueError, "empty separator");
1698 return NULL;
1699 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001700 else if (n == 1)
1701 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702
1703 list = PyList_New(0);
1704 if (list == NULL)
1705 return NULL;
1706
1707 j = len;
1708 i = j - n;
1709 while (i >= 0) {
1710 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1711 if (maxsplit-- <= 0)
1712 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001713 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 if (item == NULL)
1715 goto fail;
1716 err = PyList_Insert(list, 0, item);
1717 Py_DECREF(item);
1718 if (err < 0)
1719 goto fail;
1720 j = i;
1721 i -= n;
1722 }
1723 else
1724 i--;
1725 }
1726 item = PyString_FromStringAndSize(s, j);
1727 if (item == NULL)
1728 goto fail;
1729 err = PyList_Insert(list, 0, item);
1730 Py_DECREF(item);
1731 if (err < 0)
1732 goto fail;
1733
1734 return list;
1735
1736 fail:
1737 Py_DECREF(list);
1738 return NULL;
1739}
1740
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001742PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743"S.join(sequence) -> string\n\
1744\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001746sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
1748static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001749string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750{
1751 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001752 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001753 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001755 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001756 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001757 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001758 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 seq = PySequence_Fast(orig, "");
1761 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001762 return NULL;
1763 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001764
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001765 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001766 if (seqlen == 0) {
1767 Py_DECREF(seq);
1768 return PyString_FromString("");
1769 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001771 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001772 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1773 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001775 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001776 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778
Raymond Hettinger674f2412004-08-23 23:23:54 +00001779 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001780 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001781 * Do a pre-pass to figure out the total amount of space we'll
1782 * need (sz), see whether any argument is absurd, and defer to
1783 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001785 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001786 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001787 item = PySequence_Fast_GET_ITEM(seq, i);
1788 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001791 /* Defer to Unicode join.
1792 * CAUTION: There's no gurantee that the
1793 * original sequence can be iterated over
1794 * again, so we must pass seq here.
1795 */
1796 PyObject *result;
1797 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001798 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001799 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001800 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001801#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001802 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001803 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001804 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001805 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 Py_DECREF(seq);
1807 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001808 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001809 sz += PyString_GET_SIZE(item);
1810 if (i != 0)
1811 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001812 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001813 PyErr_SetString(PyExc_OverflowError,
1814 "join() is too long for a Python string");
1815 Py_DECREF(seq);
1816 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001818 }
1819
1820 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001821 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 if (res == NULL) {
1823 Py_DECREF(seq);
1824 return NULL;
1825 }
1826
1827 /* Catenate everything. */
1828 p = PyString_AS_STRING(res);
1829 for (i = 0; i < seqlen; ++i) {
1830 size_t n;
1831 item = PySequence_Fast_GET_ITEM(seq, i);
1832 n = PyString_GET_SIZE(item);
1833 memcpy(p, PyString_AS_STRING(item), n);
1834 p += n;
1835 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001836 memcpy(p, sep, seplen);
1837 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001838 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001840
Jeremy Hylton49048292000-07-11 03:28:17 +00001841 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843}
1844
Tim Peters52e155e2001-06-16 05:42:57 +00001845PyObject *
1846_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001847{
Tim Petersa7259592001-06-16 05:11:17 +00001848 assert(sep != NULL && PyString_Check(sep));
1849 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001850 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001851}
1852
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001853static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001855{
1856 if (*end > len)
1857 *end = len;
1858 else if (*end < 0)
1859 *end += len;
1860 if (*end < 0)
1861 *end = 0;
1862 if (*start < 0)
1863 *start += len;
1864 if (*start < 0)
1865 *start = 0;
1866}
1867
Martin v. Löwis18e16552006-02-15 17:27:45 +00001868static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001869string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001873 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001877 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001878 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 return -2;
1880 if (PyString_Check(subobj)) {
1881 sub = PyString_AS_STRING(subobj);
1882 n = PyString_GET_SIZE(subobj);
1883 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001884#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001886 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001887#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 return -2;
1890
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001891 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893 if (dir > 0) {
1894 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001896 last -= n;
1897 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001898 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001899 return (long)i;
1900 }
1901 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001903
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001906 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001907 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001908 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001909 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001910
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return -1;
1912}
1913
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.find(sub [,start [,end]]) -> int\n\
1917\n\
1918Return the lowest index in S where substring sub is found,\n\
1919such that sub is contained within s[start,end]. Optional\n\
1920arguments start and end are interpreted as in slice notation.\n\
1921\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001922Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923
1924static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001925string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 if (result == -2)
1929 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001930 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931}
1932
1933
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001934PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935"S.index(sub [,start [,end]]) -> int\n\
1936\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
1939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001940string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 if (result == -2)
1944 return NULL;
1945 if (result == -1) {
1946 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001947 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 return NULL;
1949 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001950 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951}
1952
1953
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001954PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955"S.rfind(sub [,start [,end]]) -> int\n\
1956\n\
1957Return the highest index in S where substring sub is found,\n\
1958such that sub is contained within s[start,end]. Optional\n\
1959arguments start and end are interpreted as in slice notation.\n\
1960\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001961Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
1963static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001964string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967 if (result == -2)
1968 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001969 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970}
1971
1972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001973PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974"S.rindex(sub [,start [,end]]) -> int\n\
1975\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001976Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977
1978static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001979string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982 if (result == -2)
1983 return NULL;
1984 if (result == -1) {
1985 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001986 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 return NULL;
1988 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990}
1991
1992
1993static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1995{
1996 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001997 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001999 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2000 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001
2002 i = 0;
2003 if (striptype != RIGHTSTRIP) {
2004 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2005 i++;
2006 }
2007 }
2008
2009 j = len;
2010 if (striptype != LEFTSTRIP) {
2011 do {
2012 j--;
2013 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2014 j++;
2015 }
2016
2017 if (i == 0 && j == len && PyString_CheckExact(self)) {
2018 Py_INCREF(self);
2019 return (PyObject*)self;
2020 }
2021 else
2022 return PyString_FromStringAndSize(s+i, j-i);
2023}
2024
2025
2026static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002027do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028{
2029 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032 i = 0;
2033 if (striptype != RIGHTSTRIP) {
2034 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2035 i++;
2036 }
2037 }
2038
2039 j = len;
2040 if (striptype != LEFTSTRIP) {
2041 do {
2042 j--;
2043 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2044 j++;
2045 }
2046
Tim Peters8fa5dd02001-09-12 02:18:30 +00002047 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 Py_INCREF(self);
2049 return (PyObject*)self;
2050 }
2051 else
2052 return PyString_FromStringAndSize(s+i, j-i);
2053}
2054
2055
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002056static PyObject *
2057do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2058{
2059 PyObject *sep = NULL;
2060
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002061 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 return NULL;
2063
2064 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002065 if (PyString_Check(sep))
2066 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002067#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002068 else if (PyUnicode_Check(sep)) {
2069 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2070 PyObject *res;
2071 if (uniself==NULL)
2072 return NULL;
2073 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2074 striptype, sep);
2075 Py_DECREF(uniself);
2076 return res;
2077 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002078#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002079 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002080#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002081 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002082#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002083 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002084#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002085 STRIPNAME(striptype));
2086 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087 }
2088
2089 return do_strip(self, striptype);
2090}
2091
2092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095\n\
2096Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002097whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098If chars is given and not None, remove characters in chars instead.\n\
2099If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100
2101static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002104 if (PyTuple_GET_SIZE(args) == 0)
2105 return do_strip(self, BOTHSTRIP); /* Common case */
2106 else
2107 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108}
2109
2110
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002112"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002114Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002115If chars is given and not None, remove characters in chars instead.\n\
2116If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117
2118static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002119string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002121 if (PyTuple_GET_SIZE(args) == 0)
2122 return do_strip(self, LEFTSTRIP); /* Common case */
2123 else
2124 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002129"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002131Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002132If chars is given and not None, remove characters in chars instead.\n\
2133If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002138 if (PyTuple_GET_SIZE(args) == 0)
2139 return do_strip(self, RIGHTSTRIP); /* Common case */
2140 else
2141 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142}
2143
2144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146"S.lower() -> string\n\
2147\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002148Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002150/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2151#ifndef _tolower
2152#define _tolower tolower
2153#endif
2154
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002156string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002158 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002159 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002160 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002162 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002163 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002165
2166 s = PyString_AS_STRING(newobj);
2167
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002168 memcpy(s, PyString_AS_STRING(self), n);
2169
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002171 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172 if (isupper(c))
2173 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002175
Anthony Baxtera6286212006-04-11 07:42:36 +00002176 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177}
2178
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002179PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180"S.upper() -> string\n\
2181\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002182Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002184#ifndef _toupper
2185#define _toupper toupper
2186#endif
2187
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002189string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002191 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002192 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002193 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002195 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002196 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002198
2199 s = PyString_AS_STRING(newobj);
2200
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002201 memcpy(s, PyString_AS_STRING(self), n);
2202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002204 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002205 if (islower(c))
2206 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002208
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213"S.title() -> string\n\
2214\n\
2215Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002216characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217
2218static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002219string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220{
2221 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002222 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002224 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 newobj = PyString_FromStringAndSize(NULL, n);
2227 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230 for (i = 0; i < n; i++) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c)) {
2233 if (!previous_is_cased)
2234 c = toupper(c);
2235 previous_is_cased = 1;
2236 } else if (isupper(c)) {
2237 if (previous_is_cased)
2238 c = tolower(c);
2239 previous_is_cased = 1;
2240 } else
2241 previous_is_cased = 0;
2242 *s_new++ = c;
2243 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245}
2246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248"S.capitalize() -> string\n\
2249\n\
2250Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002251capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252
2253static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002254string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255{
2256 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002257 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002258 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259
Anthony Baxtera6286212006-04-11 07:42:36 +00002260 newobj = PyString_FromStringAndSize(NULL, n);
2261 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002263 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 if (0 < n) {
2265 int c = Py_CHARMASK(*s++);
2266 if (islower(c))
2267 *s_new = toupper(c);
2268 else
2269 *s_new = c;
2270 s_new++;
2271 }
2272 for (i = 1; i < n; i++) {
2273 int c = Py_CHARMASK(*s++);
2274 if (isupper(c))
2275 *s_new = tolower(c);
2276 else
2277 *s_new = c;
2278 s_new++;
2279 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002280 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281}
2282
2283
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002284PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285"S.count(sub[, start[, end]]) -> int\n\
2286\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002287Return the number of non-overlapping occurrences of substring sub in\n\
2288string S[start:end]. Optional arguments start and end are interpreted\n\
2289as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290
2291static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002292string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002293{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002294 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002295 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002296 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002297 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299
Guido van Rossumc6821402000-05-08 14:08:05 +00002300 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2301 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002303
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 if (PyString_Check(subobj)) {
2305 sub = PyString_AS_STRING(subobj);
2306 n = PyString_GET_SIZE(subobj);
2307 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002308#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002309 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002310 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002311 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2312 if (count == -1)
2313 return NULL;
2314 else
2315 return PyInt_FromLong((long) count);
2316 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002317#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2319 return NULL;
2320
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002321 string_adjust_indices(&i, &last, len);
2322
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323 m = last + 1 - n;
2324 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002325 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326
Fredrik Lundhaf722372006-05-25 17:55:31 +00002327#ifdef USE_FAST
2328 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2329 if (r < 0)
2330 r = 0; /* no match */
2331#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 r = 0;
2333 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002334 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335 if (!memcmp(s+i, sub, n)) {
2336 r++;
2337 i += n;
2338 } else {
2339 i++;
2340 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002341 if (i >= m)
2342 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002343 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002344 if (t == NULL)
2345 break;
2346 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002348#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002349 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350}
2351
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002352PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353"S.swapcase() -> string\n\
2354\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002356converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
2358static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002359string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360{
2361 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002362 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002363 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364
Anthony Baxtera6286212006-04-11 07:42:36 +00002365 newobj = PyString_FromStringAndSize(NULL, n);
2366 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002368 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 for (i = 0; i < n; i++) {
2370 int c = Py_CHARMASK(*s++);
2371 if (islower(c)) {
2372 *s_new = toupper(c);
2373 }
2374 else if (isupper(c)) {
2375 *s_new = tolower(c);
2376 }
2377 else
2378 *s_new = c;
2379 s_new++;
2380 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002381 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382}
2383
2384
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002385PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386"S.translate(table [,deletechars]) -> string\n\
2387\n\
2388Return a copy of the string S, where all characters occurring\n\
2389in the optional argument deletechars are removed, and the\n\
2390remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002391translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
2393static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002394string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396 register char *input, *output;
2397 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002398 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002401 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402 PyObject *result;
2403 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002406 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409
2410 if (PyString_Check(tableobj)) {
2411 table1 = PyString_AS_STRING(tableobj);
2412 tablen = PyString_GET_SIZE(tableobj);
2413 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002414#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002416 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 parameter; instead a mapping to None will cause characters
2418 to be deleted. */
2419 if (delobj != NULL) {
2420 PyErr_SetString(PyExc_TypeError,
2421 "deletions are implemented differently for unicode");
2422 return NULL;
2423 }
2424 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2425 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002426#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429
Martin v. Löwis00b61272002-12-12 20:03:19 +00002430 if (tablen != 256) {
2431 PyErr_SetString(PyExc_ValueError,
2432 "translation table must be 256 characters long");
2433 return NULL;
2434 }
2435
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 if (delobj != NULL) {
2437 if (PyString_Check(delobj)) {
2438 del_table = PyString_AS_STRING(delobj);
2439 dellen = PyString_GET_SIZE(delobj);
2440 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002441#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 else if (PyUnicode_Check(delobj)) {
2443 PyErr_SetString(PyExc_TypeError,
2444 "deletions are implemented differently for unicode");
2445 return NULL;
2446 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002447#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2449 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002450 }
2451 else {
2452 del_table = NULL;
2453 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454 }
2455
2456 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002457 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458 result = PyString_FromStringAndSize((char *)NULL, inlen);
2459 if (result == NULL)
2460 return NULL;
2461 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002462 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463
2464 if (dellen == 0) {
2465 /* If no deletions are required, use faster code */
2466 for (i = inlen; --i >= 0; ) {
2467 c = Py_CHARMASK(*input++);
2468 if (Py_CHARMASK((*output++ = table[c])) != c)
2469 changed = 1;
2470 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002471 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472 return result;
2473 Py_DECREF(result);
2474 Py_INCREF(input_obj);
2475 return input_obj;
2476 }
2477
2478 for (i = 0; i < 256; i++)
2479 trans_table[i] = Py_CHARMASK(table[i]);
2480
2481 for (i = 0; i < dellen; i++)
2482 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2483
2484 for (i = inlen; --i >= 0; ) {
2485 c = Py_CHARMASK(*input++);
2486 if (trans_table[c] != -1)
2487 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2488 continue;
2489 changed = 1;
2490 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002491 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492 Py_DECREF(result);
2493 Py_INCREF(input_obj);
2494 return input_obj;
2495 }
2496 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002497 if (inlen > 0)
2498 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499 return result;
2500}
2501
2502
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002503#define FORWARD 1
2504#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002506/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002508/* Don't call if length < 2 */
2509#define Py_STRING_MATCH(target, offset, pattern, length) \
2510 (target[offset] == pattern[0] && \
2511 target[offset+length-1] == pattern[length-1] && \
2512 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002514#define findchar(target, target_len, c) \
2515 ((char *)memchr((const void *)(target), c, target_len))
2516
2517/* String ops must return a string. */
2518/* If the object is subclass of string, create a copy */
2519static PyStringObject *
2520return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002522 if (PyString_CheckExact(self)) {
2523 Py_INCREF(self);
2524 return self;
2525 }
2526 return (PyStringObject *)PyString_FromStringAndSize(
2527 PyString_AS_STRING(self),
2528 PyString_GET_SIZE(self));
2529}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002531static Py_ssize_t
2532countchar(char *target, int target_len, char c)
2533{
2534 Py_ssize_t count=0;
2535 char *start=target;
2536 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 while ( (start=findchar(start, end-start, c)) != NULL ) {
2539 count++;
2540 start += 1;
2541 }
2542
2543 return count;
2544}
2545
2546static Py_ssize_t
2547findstring(char *target, Py_ssize_t target_len,
2548 char *pattern, Py_ssize_t pattern_len,
2549 Py_ssize_t start,
2550 Py_ssize_t end,
2551 int direction)
2552{
2553 if (start < 0) {
2554 start += target_len;
2555 if (start < 0)
2556 start = 0;
2557 }
2558 if (end > target_len) {
2559 end = target_len;
2560 } else if (end < 0) {
2561 end += target_len;
2562 if (end < 0)
2563 end = 0;
2564 }
2565
2566 /* zero-length substrings always match at the first attempt */
2567 if (pattern_len == 0)
2568 return (direction > 0) ? start : end;
2569
2570 end -= pattern_len;
2571
2572 if (direction < 0) {
2573 for (; end >= start; end--)
2574 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2575 return end;
2576 } else {
2577 for (; start <= end; start++)
2578 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2579 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580 }
2581 return -1;
2582}
2583
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584Py_ssize_t
2585countstring(char *target, Py_ssize_t target_len,
2586 char *pattern, Py_ssize_t pattern_len,
2587 Py_ssize_t start,
2588 Py_ssize_t end,
2589 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002592
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 if (start < 0) {
2594 start += target_len;
2595 if (start < 0)
2596 start = 0;
2597 }
2598 if (end > target_len) {
2599 end = target_len;
2600 } else if (end < 0) {
2601 end += target_len;
2602 if (end < 0)
2603 end = 0;
2604 }
2605
2606 /* zero-length substrings match everywhere */
2607 if (pattern_len == 0)
2608 return target_len+1;
2609
2610 end -= pattern_len;
2611
2612 if (direction < 0) {
2613 for (; end >= start; end--)
2614 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2615 count++;
2616 end -= pattern_len-1;
2617 }
2618 } else {
2619 for (; start <= end; start++)
2620 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2621 count++;
2622 start += pattern_len-1;
2623 }
2624 }
2625 return count;
2626}
2627
2628
2629/* Algorithms for difference cases of string replacement */
2630
2631/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2632static PyStringObject *
2633replace_interleave(PyStringObject *self,
2634 PyStringObject *to,
2635 Py_ssize_t maxcount)
2636{
2637 char *self_s, *to_s, *result_s;
2638 Py_ssize_t self_len, to_len, result_len;
2639 Py_ssize_t count, i, product;
2640 PyStringObject *result;
2641
2642 self_len = PyString_GET_SIZE(self);
2643 to_len = PyString_GET_SIZE(to);
2644
2645 /* 1 at the end plus 1 after every character */
2646 count = self_len+1;
2647 if (maxcount < count)
2648 count = maxcount;
2649
2650 /* Check for overflow */
2651 /* result_len = count * to_len + self_len; */
2652 product = count * to_len;
2653 if (product / to_len != count) {
2654 PyErr_SetString(PyExc_OverflowError,
2655 "replace string is too long");
2656 return NULL;
2657 }
2658 result_len = product + self_len;
2659 if (result_len < 0) {
2660 PyErr_SetString(PyExc_OverflowError,
2661 "replace string is too long");
2662 return NULL;
2663 }
2664
2665 if (! (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) )
2667 return NULL;
2668
2669 self_s = PyString_AS_STRING(self);
2670 to_s = PyString_AS_STRING(to);
2671 to_len = PyString_GET_SIZE(to);
2672 result_s = PyString_AS_STRING(result);
2673
2674 /* TODO: special case single character, which doesn't need memcpy */
2675
2676 /* Lay the first one down (guaranteed this will occur) */
2677 memcpy(result_s, to_s, to_len);
2678 result_s += to_len;
2679 count -= 1;
2680
2681 for (i=0; i<count; i++) {
2682 *result_s++ = *self_s++;
2683 memcpy(result_s, to_s, to_len);
2684 result_s += to_len;
2685 }
2686
2687 /* Copy the rest of the original string */
2688 memcpy(result_s, self_s, self_len-i);
2689
2690 return result;
2691}
2692
2693/* Special case for deleting a single character */
2694/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2695static PyStringObject *
2696replace_delete_single_character(PyStringObject *self,
2697 char from_c, Py_ssize_t maxcount)
2698{
2699 char *self_s, *result_s;
2700 char *start, *next, *end;
2701 Py_ssize_t self_len, result_len;
2702 Py_ssize_t count;
2703 PyStringObject *result;
2704
2705 self_len = PyString_GET_SIZE(self);
2706 self_s = PyString_AS_STRING(self);
2707
2708 count = countchar(self_s, self_len, from_c);
2709 if (count == 0) {
2710 return return_self(self);
2711 }
2712 if (count > maxcount)
2713 count = maxcount;
2714
2715 result_len = self_len - count; /* from_len == 1 */
2716 assert(result_len>=0);
2717
2718 if ( (result = (PyStringObject *)
2719 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2720 return NULL;
2721 result_s = PyString_AS_STRING(result);
2722
2723 start = self_s;
2724 end = self_s + self_len;
2725 while (count-- > 0) {
2726 next = findchar(start, end-start, from_c);
2727 if (next == NULL)
2728 break;
2729 memcpy(result_s, start, next-start);
2730 result_s += (next-start);
2731 start = next+1;
2732 }
2733 memcpy(result_s, start, end-start);
2734
2735 return result;
2736}
2737
2738/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2739
2740static PyStringObject *
2741replace_delete_substring(PyStringObject *self, PyStringObject *from,
2742 Py_ssize_t maxcount) {
2743 char *self_s, *from_s, *result_s;
2744 char *start, *next, *end;
2745 Py_ssize_t self_len, from_len, result_len;
2746 Py_ssize_t count, offset;
2747 PyStringObject *result;
2748
2749 self_len = PyString_GET_SIZE(self);
2750 self_s = PyString_AS_STRING(self);
2751 from_len = PyString_GET_SIZE(from);
2752 from_s = PyString_AS_STRING(from);
2753
2754 count = countstring(self_s, self_len,
2755 from_s, from_len,
2756 0, self_len, 1);
2757
2758 if (count > maxcount)
2759 count = maxcount;
2760
2761 if (count == 0) {
2762 /* no matches */
2763 return return_self(self);
2764 }
2765
2766 result_len = self_len - (count * from_len);
2767 assert (result_len>=0);
2768
2769 if ( (result = (PyStringObject *)
2770 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2771 return NULL;
2772
2773 result_s = PyString_AS_STRING(result);
2774
2775 start = self_s;
2776 end = self_s + self_len;
2777 while (count-- > 0) {
2778 offset = findstring(start, end-start,
2779 from_s, from_len,
2780 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002781 if (offset == -1)
2782 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 next = start + offset;
2784
2785 memcpy(result_s, start, next-start);
2786
2787 result_s += (next-start);
2788 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002789 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002790 memcpy(result_s, start, end-start);
2791 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002792}
2793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2795static PyStringObject *
2796replace_single_character_in_place(PyStringObject *self,
2797 char from_c, char to_c,
2798 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002799{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 char *self_s, *result_s, *start, *end, *next;
2801 Py_ssize_t self_len;
2802 PyStringObject *result;
2803
2804 /* The result string will be the same size */
2805 self_s = PyString_AS_STRING(self);
2806 self_len = PyString_GET_SIZE(self);
2807
2808 next = findchar(self_s, self_len, from_c);
2809
2810 if (next == NULL) {
2811 /* No matches; return the original string */
2812 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814
2815 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002816 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817 if (result == NULL)
2818 return NULL;
2819 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002820 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821
2822 /* change everything in-place, starting with this one */
2823 start = result_s + (next-self_s);
2824 *start = to_c;
2825 start++;
2826 end = result_s + self_len;
2827
2828 while (--maxcount > 0) {
2829 next = findchar(start, end-start, from_c);
2830 if (next == NULL)
2831 break;
2832 *next = to_c;
2833 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002834 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835
2836 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002837}
2838
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2840static PyStringObject *
2841replace_substring_in_place(PyStringObject *self,
2842 PyStringObject *from,
2843 PyStringObject *to,
2844 Py_ssize_t maxcount)
2845{
2846 char *result_s, *start, *end;
2847 char *self_s, *from_s, *to_s;
2848 Py_ssize_t self_len, from_len, offset;
2849 PyStringObject *result;
2850
2851 /* The result string will be the same size */
2852
2853 self_s = PyString_AS_STRING(self);
2854 self_len = PyString_GET_SIZE(self);
2855
2856 from_s = PyString_AS_STRING(from);
2857 from_len = PyString_GET_SIZE(from);
2858 to_s = PyString_AS_STRING(to);
2859
2860 offset = findstring(self_s, self_len,
2861 from_s, from_len,
2862 0, self_len, FORWARD);
2863
2864 if (offset == -1) {
2865 /* No matches; return the original string */
2866 return return_self(self);
2867 }
2868
2869 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002870 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871 if (result == NULL)
2872 return NULL;
2873 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002874 memcpy(result_s, self_s, self_len);
2875
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876
2877 /* change everything in-place, starting with this one */
2878 start = result_s + offset;
2879 memcpy(start, to_s, from_len);
2880 start += from_len;
2881 end = result_s + self_len;
2882
2883 while ( --maxcount > 0) {
2884 offset = findstring(start, end-start,
2885 from_s, from_len,
2886 0, end-start, FORWARD);
2887 if (offset==-1)
2888 break;
2889 memcpy(start+offset, to_s, from_len);
2890 start += offset+from_len;
2891 }
2892
2893 return result;
2894}
2895
2896/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2897static PyStringObject *
2898replace_single_character(PyStringObject *self,
2899 char from_c,
2900 PyStringObject *to,
2901 Py_ssize_t maxcount)
2902{
2903 char *self_s, *to_s, *result_s;
2904 char *start, *next, *end;
2905 Py_ssize_t self_len, to_len, result_len;
2906 Py_ssize_t count, product;
2907 PyStringObject *result;
2908
2909 self_s = PyString_AS_STRING(self);
2910 self_len = PyString_GET_SIZE(self);
2911
2912 count = countchar(self_s, self_len, from_c);
2913 if (count > maxcount)
2914 count = maxcount;
2915
2916 if (count == 0) {
2917 /* no matches, return unchanged */
2918 return return_self(self);
2919 }
2920
2921 to_s = PyString_AS_STRING(to);
2922 to_len = PyString_GET_SIZE(to);
2923
2924 /* use the difference between current and new, hence the "-1" */
2925 /* result_len = self_len + count * (to_len-1) */
2926 product = count * (to_len-1);
2927 if (product / (to_len-1) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2930 }
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
2936
2937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
2941
2942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 next = findchar(start, end-start, from_c);
2946 if (next == NULL)
2947 break;
2948
2949 if (next == start) {
2950 /* replace with the 'to' */
2951 memcpy(result_s, to_s, to_len);
2952 result_s += to_len;
2953 start += 1;
2954 } else {
2955 /* copy the unchanged old then the 'to' */
2956 memcpy(result_s, start, next-start);
2957 result_s += (next-start);
2958 memcpy(result_s, to_s, to_len);
2959 result_s += to_len;
2960 start = next+1;
2961 }
2962 }
2963 /* Copy the remainder of the remaining string */
2964 memcpy(result_s, start, end-start);
2965
2966 return result;
2967}
2968
2969/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2970static PyStringObject *
2971replace_substring(PyStringObject *self,
2972 PyStringObject *from,
2973 PyStringObject *to,
2974 Py_ssize_t maxcount) {
2975 char *self_s, *from_s, *to_s, *result_s;
2976 char *start, *next, *end;
2977 Py_ssize_t self_len, from_len, to_len, result_len;
2978 Py_ssize_t count, offset, product;
2979 PyStringObject *result;
2980
2981 self_s = PyString_AS_STRING(self);
2982 self_len = PyString_GET_SIZE(self);
2983 from_s = PyString_AS_STRING(from);
2984 from_len = PyString_GET_SIZE(from);
2985
2986 count = countstring(self_s, self_len,
2987 from_s, from_len,
2988 0, self_len, FORWARD);
2989 if (count > maxcount)
2990 count = maxcount;
2991
2992 if (count == 0) {
2993 /* no matches, return unchanged */
2994 return return_self(self);
2995 }
2996
2997 to_s = PyString_AS_STRING(to);
2998 to_len = PyString_GET_SIZE(to);
2999
3000 /* Check for overflow */
3001 /* result_len = self_len + count * (to_len-from_len) */
3002 product = count * (to_len-from_len);
3003 if (product / (to_len-from_len) != count) {
3004 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3005 return NULL;
3006 }
3007 result_len = self_len + product;
3008 if (result_len < 0) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012
3013 if ( (result = (PyStringObject *)
3014 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3015 return NULL;
3016 result_s = PyString_AS_STRING(result);
3017
3018 start = self_s;
3019 end = self_s + self_len;
3020 while (count-- > 0) {
3021 offset = findstring(start, end-start,
3022 from_s, from_len,
3023 0, end-start, FORWARD);
3024 if (offset == -1)
3025 break;
3026 next = start+offset;
3027 if (next == start) {
3028 /* replace with the 'to' */
3029 memcpy(result_s, to_s, to_len);
3030 result_s += to_len;
3031 start += from_len;
3032 } else {
3033 /* copy the unchanged old then the 'to' */
3034 memcpy(result_s, start, next-start);
3035 result_s += (next-start);
3036 memcpy(result_s, to_s, to_len);
3037 result_s += to_len;
3038 start = next+from_len;
3039 }
3040 }
3041 /* Copy the remainder of the remaining string */
3042 memcpy(result_s, start, end-start);
3043
3044 return result;
3045}
3046
3047
3048static PyStringObject *
3049replace(PyStringObject *self,
3050 PyStringObject *from,
3051 PyStringObject *to,
3052 Py_ssize_t maxcount)
3053{
3054 Py_ssize_t from_len, to_len;
3055
3056 if (maxcount < 0) {
3057 maxcount = PY_SSIZE_T_MAX;
3058 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3059 /* nothing to do; return the original string */
3060 return return_self(self);
3061 }
3062
3063 from_len = PyString_GET_SIZE(from);
3064 to_len = PyString_GET_SIZE(to);
3065
3066 if (maxcount == 0 ||
3067 (from_len == 0 && to_len == 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self);
3070 }
3071
3072 /* Handle zero-length special cases */
3073
3074 if (from_len == 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self, to, maxcount);
3079 }
3080
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
3084 if (PyString_GET_SIZE(self) == 0) {
3085 return return_self(self);
3086 }
3087
3088 if (to_len == 0) {
3089 /* delete all occurances of 'from' string */
3090 if (from_len == 1) {
3091 return replace_delete_single_character(
3092 self, PyString_AS_STRING(from)[0], maxcount);
3093 } else {
3094 return replace_delete_substring(self, from, maxcount);
3095 }
3096 }
3097
3098 /* Handle special case where both strings have the same length */
3099
3100 if (from_len == to_len) {
3101 if (from_len == 1) {
3102 return replace_single_character_in_place(
3103 self,
3104 PyString_AS_STRING(from)[0],
3105 PyString_AS_STRING(to)[0],
3106 maxcount);
3107 } else {
3108 return replace_substring_in_place(
3109 self, from, to, maxcount);
3110 }
3111 }
3112
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len == 1) {
3115 return replace_single_character(self, PyString_AS_STRING(from)[0],
3116 to, maxcount);
3117 } else {
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self, from, to, maxcount);
3120 }
3121}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003124"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003125\n\
3126Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003127old replaced by new. If the optional argument count is\n\
3128given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003129
3130static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003131string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003133 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003134 PyObject *from, *to;
3135 char *tmp_s;
3136 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003141 if (PyString_Check(from)) {
3142 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003145 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003146 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003147 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003148#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003149 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 return NULL;
3151
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003152 if (PyString_Check(to)) {
3153 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003155#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003156 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003157 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003158 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003159#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003160 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
3162
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 return (PyObject *)replace((PyStringObject *) self,
3164 (PyStringObject *) from,
3165 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166}
3167
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003168/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003170PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003171"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003173Return True if S starts with the specified prefix, False otherwise.\n\
3174With optional start, test S beginning at that position.\n\
3175With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003176
3177static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003178string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003181 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003183 Py_ssize_t plen;
3184 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003185 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003187
Guido van Rossumc6821402000-05-08 14:08:05 +00003188 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3189 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 return NULL;
3191 if (PyString_Check(subobj)) {
3192 prefix = PyString_AS_STRING(subobj);
3193 plen = PyString_GET_SIZE(subobj);
3194 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003195#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003196 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003197 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003198 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003199 subobj, start, end, -1);
3200 if (rc == -1)
3201 return NULL;
3202 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003203 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003204 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003205#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003207 return NULL;
3208
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003209 string_adjust_indices(&start, &end, len);
3210
3211 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003212 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 if (end-start >= plen)
3215 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3216 else
3217 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218}
3219
3220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003222"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003224Return True if S ends with the specified suffix, False otherwise.\n\
3225With optional start, test S beginning at that position.\n\
3226With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227
3228static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003229string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003230{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003232 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003234 Py_ssize_t slen;
3235 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003236 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003238
Guido van Rossumc6821402000-05-08 14:08:05 +00003239 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3240 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241 return NULL;
3242 if (PyString_Check(subobj)) {
3243 suffix = PyString_AS_STRING(subobj);
3244 slen = PyString_GET_SIZE(subobj);
3245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003246#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003247 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003248 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003249 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003250 subobj, start, end, +1);
3251 if (rc == -1)
3252 return NULL;
3253 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003254 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003255 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003256#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003258 return NULL;
3259
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003260 string_adjust_indices(&start, &end, len);
3261
3262 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003263 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003264
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003265 if (end-slen > start)
3266 start = end - slen;
3267 if (end-start >= slen)
3268 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3269 else
3270 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003271}
3272
3273
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003274PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003275"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003276\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003277Encodes S using the codec registered for encoding. encoding defaults\n\
3278to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003279handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003280a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3281'xmlcharrefreplace' as well as any other name registered with\n\
3282codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003283
3284static PyObject *
3285string_encode(PyStringObject *self, PyObject *args)
3286{
3287 char *encoding = NULL;
3288 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003289 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003290
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003291 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3292 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003293 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003294 if (v == NULL)
3295 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003296 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3297 PyErr_Format(PyExc_TypeError,
3298 "encoder did not return a string/unicode object "
3299 "(type=%.400s)",
3300 v->ob_type->tp_name);
3301 Py_DECREF(v);
3302 return NULL;
3303 }
3304 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003305
3306 onError:
3307 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003308}
3309
3310
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003311PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003312"S.decode([encoding[,errors]]) -> object\n\
3313\n\
3314Decodes S using the codec registered for encoding. encoding defaults\n\
3315to the default encoding. errors may be given to set a different error\n\
3316handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003317a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3318as well as any other name registerd with codecs.register_error that is\n\
3319able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003320
3321static PyObject *
3322string_decode(PyStringObject *self, PyObject *args)
3323{
3324 char *encoding = NULL;
3325 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003326 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003327
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003328 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3329 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003330 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003331 if (v == NULL)
3332 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003333 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3334 PyErr_Format(PyExc_TypeError,
3335 "decoder did not return a string/unicode object "
3336 "(type=%.400s)",
3337 v->ob_type->tp_name);
3338 Py_DECREF(v);
3339 return NULL;
3340 }
3341 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003342
3343 onError:
3344 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003345}
3346
3347
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003348PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003349"S.expandtabs([tabsize]) -> string\n\
3350\n\
3351Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003352If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353
3354static PyObject*
3355string_expandtabs(PyStringObject *self, PyObject *args)
3356{
3357 const char *e, *p;
3358 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003359 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003360 PyObject *u;
3361 int tabsize = 8;
3362
3363 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3364 return NULL;
3365
Thomas Wouters7e474022000-07-16 12:04:32 +00003366 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 i = j = 0;
3368 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3369 for (p = PyString_AS_STRING(self); p < e; p++)
3370 if (*p == '\t') {
3371 if (tabsize > 0)
3372 j += tabsize - (j % tabsize);
3373 }
3374 else {
3375 j++;
3376 if (*p == '\n' || *p == '\r') {
3377 i += j;
3378 j = 0;
3379 }
3380 }
3381
3382 /* Second pass: create output string and fill it */
3383 u = PyString_FromStringAndSize(NULL, i + j);
3384 if (!u)
3385 return NULL;
3386
3387 j = 0;
3388 q = PyString_AS_STRING(u);
3389
3390 for (p = PyString_AS_STRING(self); p < e; p++)
3391 if (*p == '\t') {
3392 if (tabsize > 0) {
3393 i = tabsize - (j % tabsize);
3394 j += i;
3395 while (i--)
3396 *q++ = ' ';
3397 }
3398 }
3399 else {
3400 j++;
3401 *q++ = *p;
3402 if (*p == '\n' || *p == '\r')
3403 j = 0;
3404 }
3405
3406 return u;
3407}
3408
Tim Peters8fa5dd02001-09-12 02:18:30 +00003409static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003410pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411{
3412 PyObject *u;
3413
3414 if (left < 0)
3415 left = 0;
3416 if (right < 0)
3417 right = 0;
3418
Tim Peters8fa5dd02001-09-12 02:18:30 +00003419 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420 Py_INCREF(self);
3421 return (PyObject *)self;
3422 }
3423
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003424 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 left + PyString_GET_SIZE(self) + right);
3426 if (u) {
3427 if (left)
3428 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003429 memcpy(PyString_AS_STRING(u) + left,
3430 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431 PyString_GET_SIZE(self));
3432 if (right)
3433 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3434 fill, right);
3435 }
3436
3437 return u;
3438}
3439
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003440PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003441"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003442"\n"
3443"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445
3446static PyObject *
3447string_ljust(PyStringObject *self, PyObject *args)
3448{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003449 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003450 char fillchar = ' ';
3451
Thomas Wouters4abb3662006-04-19 14:50:15 +00003452 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453 return NULL;
3454
Tim Peters8fa5dd02001-09-12 02:18:30 +00003455 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456 Py_INCREF(self);
3457 return (PyObject*) self;
3458 }
3459
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461}
3462
3463
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003464PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003465"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003466"\n"
3467"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003468"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469
3470static PyObject *
3471string_rjust(PyStringObject *self, PyObject *args)
3472{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003473 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474 char fillchar = ' ';
3475
Thomas Wouters4abb3662006-04-19 14:50:15 +00003476 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477 return NULL;
3478
Tim Peters8fa5dd02001-09-12 02:18:30 +00003479 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003480 Py_INCREF(self);
3481 return (PyObject*) self;
3482 }
3483
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003484 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003485}
3486
3487
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003490"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003491"Return S centered in a string of length width. Padding is\n"
3492"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493
3494static PyObject *
3495string_center(PyStringObject *self, PyObject *args)
3496{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003497 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003498 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003499 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500
Thomas Wouters4abb3662006-04-19 14:50:15 +00003501 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502 return NULL;
3503
Tim Peters8fa5dd02001-09-12 02:18:30 +00003504 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 Py_INCREF(self);
3506 return (PyObject*) self;
3507 }
3508
3509 marg = width - PyString_GET_SIZE(self);
3510 left = marg / 2 + (marg & width & 1);
3511
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003512 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513}
3514
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003515PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003516"S.zfill(width) -> string\n"
3517"\n"
3518"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003519"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003520
3521static PyObject *
3522string_zfill(PyStringObject *self, PyObject *args)
3523{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003524 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003525 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003526 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003527 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003528
Thomas Wouters4abb3662006-04-19 14:50:15 +00003529 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003530 return NULL;
3531
3532 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003533 if (PyString_CheckExact(self)) {
3534 Py_INCREF(self);
3535 return (PyObject*) self;
3536 }
3537 else
3538 return PyString_FromStringAndSize(
3539 PyString_AS_STRING(self),
3540 PyString_GET_SIZE(self)
3541 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003542 }
3543
3544 fill = width - PyString_GET_SIZE(self);
3545
3546 s = pad(self, fill, 0, '0');
3547
3548 if (s == NULL)
3549 return NULL;
3550
3551 p = PyString_AS_STRING(s);
3552 if (p[fill] == '+' || p[fill] == '-') {
3553 /* move sign to beginning of string */
3554 p[0] = p[fill];
3555 p[fill] = '0';
3556 }
3557
3558 return (PyObject*) s;
3559}
3560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003561PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003562"S.isspace() -> bool\n\
3563\n\
3564Return True if all characters in S are whitespace\n\
3565and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566
3567static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003568string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569{
Fred Drakeba096332000-07-09 07:04:36 +00003570 register const unsigned char *p
3571 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003572 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003573
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574 /* Shortcut for single character strings */
3575 if (PyString_GET_SIZE(self) == 1 &&
3576 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003579 /* Special case for empty strings */
3580 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003582
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583 e = p + PyString_GET_SIZE(self);
3584 for (; p < e; p++) {
3585 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003587 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589}
3590
3591
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003592PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003595Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597
3598static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003599string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600{
Fred Drakeba096332000-07-09 07:04:36 +00003601 register const unsigned char *p
3602 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003603 register const unsigned char *e;
3604
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 /* Shortcut for single character strings */
3606 if (PyString_GET_SIZE(self) == 1 &&
3607 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609
3610 /* Special case for empty strings */
3611 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613
3614 e = p + PyString_GET_SIZE(self);
3615 for (; p < e; p++) {
3616 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003620}
3621
3622
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003623PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003625\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003626Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003627and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003628
3629static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003630string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003631{
Fred Drakeba096332000-07-09 07:04:36 +00003632 register const unsigned char *p
3633 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003634 register const unsigned char *e;
3635
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003636 /* Shortcut for single character strings */
3637 if (PyString_GET_SIZE(self) == 1 &&
3638 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003640
3641 /* Special case for empty strings */
3642 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003644
3645 e = p + PyString_GET_SIZE(self);
3646 for (; p < e; p++) {
3647 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003651}
3652
3653
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003654PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003657Return True if all characters in S are digits\n\
3658and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
3660static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003661string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662{
Fred Drakeba096332000-07-09 07:04:36 +00003663 register const unsigned char *p
3664 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003665 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 /* Shortcut for single character strings */
3668 if (PyString_GET_SIZE(self) == 1 &&
3669 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003672 /* Special case for empty strings */
3673 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003675
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 e = p + PyString_GET_SIZE(self);
3677 for (; p < e; p++) {
3678 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682}
3683
3684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003685PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003689at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690
3691static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003692string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693{
Fred Drakeba096332000-07-09 07:04:36 +00003694 register const unsigned char *p
3695 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003696 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697 int cased;
3698
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 /* Shortcut for single character strings */
3700 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003703 /* Special case for empty strings */
3704 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003706
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 e = p + PyString_GET_SIZE(self);
3708 cased = 0;
3709 for (; p < e; p++) {
3710 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003711 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 else if (!cased && islower(*p))
3713 cased = 1;
3714 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716}
3717
3718
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003719PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003722Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003723at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724
3725static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003726string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003727{
Fred Drakeba096332000-07-09 07:04:36 +00003728 register const unsigned char *p
3729 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003730 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 int cased;
3732
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 /* Shortcut for single character strings */
3734 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003735 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003737 /* Special case for empty strings */
3738 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003740
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741 e = p + PyString_GET_SIZE(self);
3742 cased = 0;
3743 for (; p < e; p++) {
3744 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003745 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 else if (!cased && isupper(*p))
3747 cased = 1;
3748 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750}
3751
3752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003753PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003756Return True if S is a titlecased string and there is at least one\n\
3757character in S, i.e. uppercase characters may only follow uncased\n\
3758characters and lowercase characters only cased ones. Return False\n\
3759otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760
3761static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003762string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763{
Fred Drakeba096332000-07-09 07:04:36 +00003764 register const unsigned char *p
3765 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003766 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 int cased, previous_is_cased;
3768
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769 /* Shortcut for single character strings */
3770 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003771 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003773 /* Special case for empty strings */
3774 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003775 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003776
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 e = p + PyString_GET_SIZE(self);
3778 cased = 0;
3779 previous_is_cased = 0;
3780 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003781 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782
3783 if (isupper(ch)) {
3784 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003785 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 previous_is_cased = 1;
3787 cased = 1;
3788 }
3789 else if (islower(ch)) {
3790 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003791 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 previous_is_cased = 1;
3793 cased = 1;
3794 }
3795 else
3796 previous_is_cased = 0;
3797 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003798 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799}
3800
3801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003802PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003803"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804\n\
3805Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003806Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003807is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809static PyObject*
3810string_splitlines(PyStringObject *self, PyObject *args)
3811{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003812 register Py_ssize_t i;
3813 register Py_ssize_t j;
3814 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003815 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 PyObject *list;
3817 PyObject *str;
3818 char *data;
3819
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003820 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 return NULL;
3822
3823 data = PyString_AS_STRING(self);
3824 len = PyString_GET_SIZE(self);
3825
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 list = PyList_New(0);
3827 if (!list)
3828 goto onError;
3829
3830 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003831 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003832
Guido van Rossum4c08d552000-03-10 22:55:18 +00003833 /* Find a line and append it */
3834 while (i < len && data[i] != '\n' && data[i] != '\r')
3835 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836
3837 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003838 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003839 if (i < len) {
3840 if (data[i] == '\r' && i + 1 < len &&
3841 data[i+1] == '\n')
3842 i += 2;
3843 else
3844 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003845 if (keepends)
3846 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003848 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849 j = i;
3850 }
3851 if (j < len) {
3852 SPLIT_APPEND(data, j, len);
3853 }
3854
3855 return list;
3856
3857 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003858 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003859 return NULL;
3860}
3861
3862#undef SPLIT_APPEND
3863
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003864static PyObject *
3865string_getnewargs(PyStringObject *v)
3866{
3867 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3868}
3869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003870
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003871static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003872string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003873 /* Counterparts of the obsolete stropmodule functions; except
3874 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003875 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3876 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003877 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003878 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3879 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003880 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3881 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3882 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3883 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3884 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3885 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3886 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003887 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3888 capitalize__doc__},
3889 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3890 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3891 endswith__doc__},
3892 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3893 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3894 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3895 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3896 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3897 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3898 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3899 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3900 startswith__doc__},
3901 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3902 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3903 swapcase__doc__},
3904 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3905 translate__doc__},
3906 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3907 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3908 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3909 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3910 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3911 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3912 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3913 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3914 expandtabs__doc__},
3915 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3916 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003917 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003918 {NULL, NULL} /* sentinel */
3919};
3920
Jeremy Hylton938ace62002-07-17 16:30:39 +00003921static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003922str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3923
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003924static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003925string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003926{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003927 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003928 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003929
Guido van Rossumae960af2001-08-30 03:11:59 +00003930 if (type != &PyString_Type)
3931 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003932 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3933 return NULL;
3934 if (x == NULL)
3935 return PyString_FromString("");
3936 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003937}
3938
Guido van Rossumae960af2001-08-30 03:11:59 +00003939static PyObject *
3940str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3941{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003942 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003943 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003944
3945 assert(PyType_IsSubtype(type, &PyString_Type));
3946 tmp = string_new(&PyString_Type, args, kwds);
3947 if (tmp == NULL)
3948 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003949 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003950 n = PyString_GET_SIZE(tmp);
3951 pnew = type->tp_alloc(type, n);
3952 if (pnew != NULL) {
3953 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003954 ((PyStringObject *)pnew)->ob_shash =
3955 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003956 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003957 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003958 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003959 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003960}
3961
Guido van Rossumcacfc072002-05-24 19:01:59 +00003962static PyObject *
3963basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3964{
3965 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003966 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003967 return NULL;
3968}
3969
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003970static PyObject *
3971string_mod(PyObject *v, PyObject *w)
3972{
3973 if (!PyString_Check(v)) {
3974 Py_INCREF(Py_NotImplemented);
3975 return Py_NotImplemented;
3976 }
3977 return PyString_Format(v, w);
3978}
3979
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003980PyDoc_STRVAR(basestring_doc,
3981"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003982
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003983static PyNumberMethods string_as_number = {
3984 0, /*nb_add*/
3985 0, /*nb_subtract*/
3986 0, /*nb_multiply*/
3987 0, /*nb_divide*/
3988 string_mod, /*nb_remainder*/
3989};
3990
3991
Guido van Rossumcacfc072002-05-24 19:01:59 +00003992PyTypeObject PyBaseString_Type = {
3993 PyObject_HEAD_INIT(&PyType_Type)
3994 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003995 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003996 0,
3997 0,
3998 0, /* tp_dealloc */
3999 0, /* tp_print */
4000 0, /* tp_getattr */
4001 0, /* tp_setattr */
4002 0, /* tp_compare */
4003 0, /* tp_repr */
4004 0, /* tp_as_number */
4005 0, /* tp_as_sequence */
4006 0, /* tp_as_mapping */
4007 0, /* tp_hash */
4008 0, /* tp_call */
4009 0, /* tp_str */
4010 0, /* tp_getattro */
4011 0, /* tp_setattro */
4012 0, /* tp_as_buffer */
4013 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4014 basestring_doc, /* tp_doc */
4015 0, /* tp_traverse */
4016 0, /* tp_clear */
4017 0, /* tp_richcompare */
4018 0, /* tp_weaklistoffset */
4019 0, /* tp_iter */
4020 0, /* tp_iternext */
4021 0, /* tp_methods */
4022 0, /* tp_members */
4023 0, /* tp_getset */
4024 &PyBaseObject_Type, /* tp_base */
4025 0, /* tp_dict */
4026 0, /* tp_descr_get */
4027 0, /* tp_descr_set */
4028 0, /* tp_dictoffset */
4029 0, /* tp_init */
4030 0, /* tp_alloc */
4031 basestring_new, /* tp_new */
4032 0, /* tp_free */
4033};
4034
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004035PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004036"str(object) -> string\n\
4037\n\
4038Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004039If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004040
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041PyTypeObject PyString_Type = {
4042 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004044 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004045 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004046 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004047 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004048 (printfunc)string_print, /* tp_print */
4049 0, /* tp_getattr */
4050 0, /* tp_setattr */
4051 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004052 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004053 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004054 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004055 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004056 (hashfunc)string_hash, /* tp_hash */
4057 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004058 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004059 PyObject_GenericGetAttr, /* tp_getattro */
4060 0, /* tp_setattro */
4061 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004062 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004063 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004064 string_doc, /* tp_doc */
4065 0, /* tp_traverse */
4066 0, /* tp_clear */
4067 (richcmpfunc)string_richcompare, /* tp_richcompare */
4068 0, /* tp_weaklistoffset */
4069 0, /* tp_iter */
4070 0, /* tp_iternext */
4071 string_methods, /* tp_methods */
4072 0, /* tp_members */
4073 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004074 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004075 0, /* tp_dict */
4076 0, /* tp_descr_get */
4077 0, /* tp_descr_set */
4078 0, /* tp_dictoffset */
4079 0, /* tp_init */
4080 0, /* tp_alloc */
4081 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004082 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004083};
4084
4085void
Fred Drakeba096332000-07-09 07:04:36 +00004086PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004087{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004088 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004089 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 if (w == NULL || !PyString_Check(*pv)) {
4092 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004093 *pv = NULL;
4094 return;
4095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004096 v = string_concat((PyStringObject *) *pv, w);
4097 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004098 *pv = v;
4099}
4100
Guido van Rossum013142a1994-08-30 08:19:36 +00004101void
Fred Drakeba096332000-07-09 07:04:36 +00004102PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004103{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004104 PyString_Concat(pv, w);
4105 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004106}
4107
4108
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004109/* The following function breaks the notion that strings are immutable:
4110 it changes the size of a string. We get away with this only if there
4111 is only one module referencing the object. You can also think of it
4112 as creating a new string object and destroying the old one, only
4113 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004114 already be known to some other part of the code...
4115 Note that if there's not enough memory to resize the string, the original
4116 string object at *pv is deallocated, *pv is set to NULL, an "out of
4117 memory" exception is set, and -1 is returned. Else (on success) 0 is
4118 returned, and the value in *pv may or may not be the same as on input.
4119 As always, an extra byte is allocated for a trailing \0 byte (newsize
4120 does *not* include that), and a trailing \0 byte is stored.
4121*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004122
4123int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004124_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004125{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126 register PyObject *v;
4127 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004128 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004129 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4130 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004131 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004132 Py_DECREF(v);
4133 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004134 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004135 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004136 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004137 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004138 _Py_ForgetReference(v);
4139 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004140 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004141 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004142 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004143 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004144 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004145 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004146 _Py_NewReference(*pv);
4147 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004148 sv->ob_size = newsize;
4149 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004150 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004151 return 0;
4152}
Guido van Rossume5372401993-03-16 12:15:04 +00004153
4154/* Helpers for formatstring */
4155
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004156static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004157getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004158{
Thomas Wouters977485d2006-02-16 15:59:12 +00004159 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004160 if (argidx < arglen) {
4161 (*p_argidx)++;
4162 if (arglen < 0)
4163 return args;
4164 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004165 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004166 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 PyErr_SetString(PyExc_TypeError,
4168 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004169 return NULL;
4170}
4171
Tim Peters38fd5b62000-09-21 05:43:11 +00004172/* Format codes
4173 * F_LJUST '-'
4174 * F_SIGN '+'
4175 * F_BLANK ' '
4176 * F_ALT '#'
4177 * F_ZERO '0'
4178 */
Guido van Rossume5372401993-03-16 12:15:04 +00004179#define F_LJUST (1<<0)
4180#define F_SIGN (1<<1)
4181#define F_BLANK (1<<2)
4182#define F_ALT (1<<3)
4183#define F_ZERO (1<<4)
4184
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004185static int
Fred Drakeba096332000-07-09 07:04:36 +00004186formatfloat(char *buf, size_t buflen, int flags,
4187 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004188{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 /* fmt = '%#.' + `prec` + `type`
4190 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004191 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004192 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004193 x = PyFloat_AsDouble(v);
4194 if (x == -1.0 && PyErr_Occurred()) {
4195 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004196 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004197 }
Guido van Rossume5372401993-03-16 12:15:04 +00004198 if (prec < 0)
4199 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004200 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4201 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004202 /* Worst case length calc to ensure no buffer overrun:
4203
4204 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004205 fmt = %#.<prec>g
4206 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004207 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004208 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004209
4210 'f' formats:
4211 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4212 len = 1 + 50 + 1 + prec = 52 + prec
4213
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004214 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004215 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004216
4217 */
4218 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4219 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004220 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004221 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004222 return -1;
4223 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004224 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4225 (flags&F_ALT) ? "#" : "",
4226 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004227 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004228 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004229}
4230
Tim Peters38fd5b62000-09-21 05:43:11 +00004231/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4232 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4233 * Python's regular ints.
4234 * Return value: a new PyString*, or NULL if error.
4235 * . *pbuf is set to point into it,
4236 * *plen set to the # of chars following that.
4237 * Caller must decref it when done using pbuf.
4238 * The string starting at *pbuf is of the form
4239 * "-"? ("0x" | "0X")? digit+
4240 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004241 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004242 * There will be at least prec digits, zero-filled on the left if
4243 * necessary to get that many.
4244 * val object to be converted
4245 * flags bitmask of format flags; only F_ALT is looked at
4246 * prec minimum number of digits; 0-fill on left if needed
4247 * type a character in [duoxX]; u acts the same as d
4248 *
4249 * CAUTION: o, x and X conversions on regular ints can never
4250 * produce a '-' sign, but can for Python's unbounded ints.
4251 */
4252PyObject*
4253_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4254 char **pbuf, int *plen)
4255{
4256 PyObject *result = NULL;
4257 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004258 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004259 int sign; /* 1 if '-', else 0 */
4260 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004261 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004262 int numdigits; /* len == numnondigits + numdigits */
4263 int numnondigits = 0;
4264
4265 switch (type) {
4266 case 'd':
4267 case 'u':
4268 result = val->ob_type->tp_str(val);
4269 break;
4270 case 'o':
4271 result = val->ob_type->tp_as_number->nb_oct(val);
4272 break;
4273 case 'x':
4274 case 'X':
4275 numnondigits = 2;
4276 result = val->ob_type->tp_as_number->nb_hex(val);
4277 break;
4278 default:
4279 assert(!"'type' not in [duoxX]");
4280 }
4281 if (!result)
4282 return NULL;
4283
4284 /* To modify the string in-place, there can only be one reference. */
4285 if (result->ob_refcnt != 1) {
4286 PyErr_BadInternalCall();
4287 return NULL;
4288 }
4289 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004290 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004291 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004292 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4293 return NULL;
4294 }
4295 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004296 if (buf[len-1] == 'L') {
4297 --len;
4298 buf[len] = '\0';
4299 }
4300 sign = buf[0] == '-';
4301 numnondigits += sign;
4302 numdigits = len - numnondigits;
4303 assert(numdigits > 0);
4304
Tim Petersfff53252001-04-12 18:38:48 +00004305 /* Get rid of base marker unless F_ALT */
4306 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004307 /* Need to skip 0x, 0X or 0. */
4308 int skipped = 0;
4309 switch (type) {
4310 case 'o':
4311 assert(buf[sign] == '0');
4312 /* If 0 is only digit, leave it alone. */
4313 if (numdigits > 1) {
4314 skipped = 1;
4315 --numdigits;
4316 }
4317 break;
4318 case 'x':
4319 case 'X':
4320 assert(buf[sign] == '0');
4321 assert(buf[sign + 1] == 'x');
4322 skipped = 2;
4323 numnondigits -= 2;
4324 break;
4325 }
4326 if (skipped) {
4327 buf += skipped;
4328 len -= skipped;
4329 if (sign)
4330 buf[0] = '-';
4331 }
4332 assert(len == numnondigits + numdigits);
4333 assert(numdigits > 0);
4334 }
4335
4336 /* Fill with leading zeroes to meet minimum width. */
4337 if (prec > numdigits) {
4338 PyObject *r1 = PyString_FromStringAndSize(NULL,
4339 numnondigits + prec);
4340 char *b1;
4341 if (!r1) {
4342 Py_DECREF(result);
4343 return NULL;
4344 }
4345 b1 = PyString_AS_STRING(r1);
4346 for (i = 0; i < numnondigits; ++i)
4347 *b1++ = *buf++;
4348 for (i = 0; i < prec - numdigits; i++)
4349 *b1++ = '0';
4350 for (i = 0; i < numdigits; i++)
4351 *b1++ = *buf++;
4352 *b1 = '\0';
4353 Py_DECREF(result);
4354 result = r1;
4355 buf = PyString_AS_STRING(result);
4356 len = numnondigits + prec;
4357 }
4358
4359 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004360 if (type == 'X') {
4361 /* Need to convert all lower case letters to upper case.
4362 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004363 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004364 if (buf[i] >= 'a' && buf[i] <= 'x')
4365 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004366 }
4367 *pbuf = buf;
4368 *plen = len;
4369 return result;
4370}
4371
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004372static int
Fred Drakeba096332000-07-09 07:04:36 +00004373formatint(char *buf, size_t buflen, int flags,
4374 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004375{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004376 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004377 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4378 + 1 + 1 = 24 */
4379 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004380 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004381 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004382
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004383 x = PyInt_AsLong(v);
4384 if (x == -1 && PyErr_Occurred()) {
4385 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004386 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004387 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004388 if (x < 0 && type == 'u') {
4389 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004390 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004391 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4392 sign = "-";
4393 else
4394 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004395 if (prec < 0)
4396 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004397
4398 if ((flags & F_ALT) &&
4399 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004400 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 * of issues that cause pain:
4402 * - when 0 is being converted, the C standard leaves off
4403 * the '0x' or '0X', which is inconsistent with other
4404 * %#x/%#X conversions and inconsistent with Python's
4405 * hex() function
4406 * - there are platforms that violate the standard and
4407 * convert 0 with the '0x' or '0X'
4408 * (Metrowerks, Compaq Tru64)
4409 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004410 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004411 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004412 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004413 * We can achieve the desired consistency by inserting our
4414 * own '0x' or '0X' prefix, and substituting %x/%X in place
4415 * of %#x/%#X.
4416 *
4417 * Note that this is the same approach as used in
4418 * formatint() in unicodeobject.c
4419 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004420 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4421 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004422 }
4423 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004424 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4425 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004426 prec, type);
4427 }
4428
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004429 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4430 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004431 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004432 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004433 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004434 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004435 return -1;
4436 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004437 if (sign[0])
4438 PyOS_snprintf(buf, buflen, fmt, -x);
4439 else
4440 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004441 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004442}
4443
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004444static int
Fred Drakeba096332000-07-09 07:04:36 +00004445formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004446{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004447 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004448 if (PyString_Check(v)) {
4449 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004450 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004451 }
4452 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004453 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004454 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004455 }
4456 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004457 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004458}
4459
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004460/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4461
4462 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4463 chars are formatted. XXX This is a magic number. Each formatting
4464 routine does bounds checking to ensure no overflow, but a better
4465 solution may be to malloc a buffer of appropriate size for each
4466 format. For now, the current solution is sufficient.
4467*/
4468#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004469
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004470PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004471PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004472{
4473 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004474 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004475 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004476 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004477 PyObject *result, *orig_args;
4478#ifdef Py_USING_UNICODE
4479 PyObject *v, *w;
4480#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 PyObject *dict = NULL;
4482 if (format == NULL || !PyString_Check(format) || args == NULL) {
4483 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004484 return NULL;
4485 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004486 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004487 fmt = PyString_AS_STRING(format);
4488 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004489 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004491 if (result == NULL)
4492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004493 res = PyString_AsString(result);
4494 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004495 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004496 argidx = 0;
4497 }
4498 else {
4499 arglen = -1;
4500 argidx = -2;
4501 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004502 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4503 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004504 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004505 while (--fmtcnt >= 0) {
4506 if (*fmt != '%') {
4507 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004508 rescnt = fmtcnt + 100;
4509 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004510 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004511 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004512 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004513 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004514 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004515 }
4516 *res++ = *fmt++;
4517 }
4518 else {
4519 /* Got a format specifier */
4520 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004521 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004522 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004523 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004524 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004525 PyObject *v = NULL;
4526 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004527 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004528 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004529 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004530 char formatbuf[FORMATBUFLEN];
4531 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004532#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004533 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004534 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004535#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004536
Guido van Rossumda9c2711996-12-05 21:58:58 +00004537 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004538 if (*fmt == '(') {
4539 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004540 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004542 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004543
4544 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004546 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004547 goto error;
4548 }
4549 ++fmt;
4550 --fmtcnt;
4551 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004552 /* Skip over balanced parentheses */
4553 while (pcount > 0 && --fmtcnt >= 0) {
4554 if (*fmt == ')')
4555 --pcount;
4556 else if (*fmt == '(')
4557 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004558 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004559 }
4560 keylen = fmt - keystart - 1;
4561 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004562 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004563 "incomplete format key");
4564 goto error;
4565 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 key = PyString_FromStringAndSize(keystart,
4567 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 if (key == NULL)
4569 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004570 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004572 args_owned = 0;
4573 }
4574 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004576 if (args == NULL) {
4577 goto error;
4578 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004579 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004580 arglen = -1;
4581 argidx = -2;
4582 }
Guido van Rossume5372401993-03-16 12:15:04 +00004583 while (--fmtcnt >= 0) {
4584 switch (c = *fmt++) {
4585 case '-': flags |= F_LJUST; continue;
4586 case '+': flags |= F_SIGN; continue;
4587 case ' ': flags |= F_BLANK; continue;
4588 case '#': flags |= F_ALT; continue;
4589 case '0': flags |= F_ZERO; continue;
4590 }
4591 break;
4592 }
4593 if (c == '*') {
4594 v = getnextarg(args, arglen, &argidx);
4595 if (v == NULL)
4596 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 if (!PyInt_Check(v)) {
4598 PyErr_SetString(PyExc_TypeError,
4599 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004600 goto error;
4601 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004602 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004603 if (width < 0) {
4604 flags |= F_LJUST;
4605 width = -width;
4606 }
Guido van Rossume5372401993-03-16 12:15:04 +00004607 if (--fmtcnt >= 0)
4608 c = *fmt++;
4609 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004610 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004611 width = c - '0';
4612 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004613 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004614 if (!isdigit(c))
4615 break;
4616 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004617 PyErr_SetString(
4618 PyExc_ValueError,
4619 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004620 goto error;
4621 }
4622 width = width*10 + (c - '0');
4623 }
4624 }
4625 if (c == '.') {
4626 prec = 0;
4627 if (--fmtcnt >= 0)
4628 c = *fmt++;
4629 if (c == '*') {
4630 v = getnextarg(args, arglen, &argidx);
4631 if (v == NULL)
4632 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 if (!PyInt_Check(v)) {
4634 PyErr_SetString(
4635 PyExc_TypeError,
4636 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004637 goto error;
4638 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004639 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004640 if (prec < 0)
4641 prec = 0;
4642 if (--fmtcnt >= 0)
4643 c = *fmt++;
4644 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004645 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004646 prec = c - '0';
4647 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004648 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004649 if (!isdigit(c))
4650 break;
4651 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004652 PyErr_SetString(
4653 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004654 "prec too big");
4655 goto error;
4656 }
4657 prec = prec*10 + (c - '0');
4658 }
4659 }
4660 } /* prec */
4661 if (fmtcnt >= 0) {
4662 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004663 if (--fmtcnt >= 0)
4664 c = *fmt++;
4665 }
4666 }
4667 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004668 PyErr_SetString(PyExc_ValueError,
4669 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004670 goto error;
4671 }
4672 if (c != '%') {
4673 v = getnextarg(args, arglen, &argidx);
4674 if (v == NULL)
4675 goto error;
4676 }
4677 sign = 0;
4678 fill = ' ';
4679 switch (c) {
4680 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004681 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004682 len = 1;
4683 break;
4684 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004685#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004686 if (PyUnicode_Check(v)) {
4687 fmt = fmt_start;
4688 argidx = argidx_start;
4689 goto unicode;
4690 }
Georg Brandld45014b2005-10-01 17:06:00 +00004691#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004692 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004693#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004694 if (temp != NULL && PyUnicode_Check(temp)) {
4695 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004696 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004697 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004698 goto unicode;
4699 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004700#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004701 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004702 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004703 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004704 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004705 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004706 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004707 if (!PyString_Check(temp)) {
4708 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004709 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004710 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004711 goto error;
4712 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004713 pbuf = PyString_AS_STRING(temp);
4714 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004715 if (prec >= 0 && len > prec)
4716 len = prec;
4717 break;
4718 case 'i':
4719 case 'd':
4720 case 'u':
4721 case 'o':
4722 case 'x':
4723 case 'X':
4724 if (c == 'i')
4725 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004726 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004727 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004728 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004729 prec, c, &pbuf, &ilen);
4730 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004731 if (!temp)
4732 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004733 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004734 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004735 else {
4736 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004737 len = formatint(pbuf,
4738 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004739 flags, prec, c, v);
4740 if (len < 0)
4741 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004742 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004743 }
4744 if (flags & F_ZERO)
4745 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004746 break;
4747 case 'e':
4748 case 'E':
4749 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004750 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004751 case 'g':
4752 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004753 if (c == 'F')
4754 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004755 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004756 len = formatfloat(pbuf, sizeof(formatbuf),
4757 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004758 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004759 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004760 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004761 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004762 fill = '0';
4763 break;
4764 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004765#ifdef Py_USING_UNICODE
4766 if (PyUnicode_Check(v)) {
4767 fmt = fmt_start;
4768 argidx = argidx_start;
4769 goto unicode;
4770 }
4771#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004772 pbuf = formatbuf;
4773 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004774 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004775 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004776 break;
4777 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004778 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004779 "unsupported format character '%c' (0x%x) "
4780 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004781 c, c,
4782 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004783 goto error;
4784 }
4785 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004786 if (*pbuf == '-' || *pbuf == '+') {
4787 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004788 len--;
4789 }
4790 else if (flags & F_SIGN)
4791 sign = '+';
4792 else if (flags & F_BLANK)
4793 sign = ' ';
4794 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004795 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004796 }
4797 if (width < len)
4798 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004799 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004800 reslen -= rescnt;
4801 rescnt = width + fmtcnt + 100;
4802 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004803 if (reslen < 0) {
4804 Py_DECREF(result);
4805 return PyErr_NoMemory();
4806 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004807 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004808 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004809 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004811 }
4812 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004813 if (fill != ' ')
4814 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004815 rescnt--;
4816 if (width > len)
4817 width--;
4818 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004819 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4820 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004821 assert(pbuf[1] == c);
4822 if (fill != ' ') {
4823 *res++ = *pbuf++;
4824 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004825 }
Tim Petersfff53252001-04-12 18:38:48 +00004826 rescnt -= 2;
4827 width -= 2;
4828 if (width < 0)
4829 width = 0;
4830 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004831 }
4832 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004833 do {
4834 --rescnt;
4835 *res++ = fill;
4836 } while (--width > len);
4837 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004838 if (fill == ' ') {
4839 if (sign)
4840 *res++ = sign;
4841 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004842 (c == 'x' || c == 'X')) {
4843 assert(pbuf[0] == '0');
4844 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004845 *res++ = *pbuf++;
4846 *res++ = *pbuf++;
4847 }
4848 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004849 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004850 res += len;
4851 rescnt -= len;
4852 while (--width >= len) {
4853 --rescnt;
4854 *res++ = ' ';
4855 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004856 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004857 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004858 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004859 goto error;
4860 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004861 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004862 } /* '%' */
4863 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004864 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004865 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004866 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004867 goto error;
4868 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004869 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004871 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004872 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004873 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004874
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004875#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004876 unicode:
4877 if (args_owned) {
4878 Py_DECREF(args);
4879 args_owned = 0;
4880 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004881 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004882 if (PyTuple_Check(orig_args) && argidx > 0) {
4883 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004884 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004885 v = PyTuple_New(n);
4886 if (v == NULL)
4887 goto error;
4888 while (--n >= 0) {
4889 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4890 Py_INCREF(w);
4891 PyTuple_SET_ITEM(v, n, w);
4892 }
4893 args = v;
4894 } else {
4895 Py_INCREF(orig_args);
4896 args = orig_args;
4897 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004898 args_owned = 1;
4899 /* Take what we have of the result and let the Unicode formatting
4900 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004901 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004902 if (_PyString_Resize(&result, rescnt))
4903 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004904 fmtcnt = PyString_GET_SIZE(format) - \
4905 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004906 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4907 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004908 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004909 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004910 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004911 if (v == NULL)
4912 goto error;
4913 /* Paste what we have (result) to what the Unicode formatting
4914 function returned (v) and return the result (or error) */
4915 w = PyUnicode_Concat(result, v);
4916 Py_DECREF(result);
4917 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004918 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004919 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004920#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004921
Guido van Rossume5372401993-03-16 12:15:04 +00004922 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004923 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004924 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004925 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004926 }
Guido van Rossume5372401993-03-16 12:15:04 +00004927 return NULL;
4928}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004929
Guido van Rossum2a61e741997-01-18 07:55:05 +00004930void
Fred Drakeba096332000-07-09 07:04:36 +00004931PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004932{
4933 register PyStringObject *s = (PyStringObject *)(*p);
4934 PyObject *t;
4935 if (s == NULL || !PyString_Check(s))
4936 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004937 /* If it's a string subclass, we don't really know what putting
4938 it in the interned dict might do. */
4939 if (!PyString_CheckExact(s))
4940 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004941 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004943 if (interned == NULL) {
4944 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945 if (interned == NULL) {
4946 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004948 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004949 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004950 t = PyDict_GetItem(interned, (PyObject *)s);
4951 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004952 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953 Py_DECREF(*p);
4954 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004955 return;
4956 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004957
Armin Rigo79f7ad22004-08-07 19:27:39 +00004958 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004959 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004960 return;
4961 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004962 /* The two references in interned are not counted by refcnt.
4963 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004964 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004965 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004966}
4967
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004968void
4969PyString_InternImmortal(PyObject **p)
4970{
4971 PyString_InternInPlace(p);
4972 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4973 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4974 Py_INCREF(*p);
4975 }
4976}
4977
Guido van Rossum2a61e741997-01-18 07:55:05 +00004978
4979PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004980PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004981{
4982 PyObject *s = PyString_FromString(cp);
4983 if (s == NULL)
4984 return NULL;
4985 PyString_InternInPlace(&s);
4986 return s;
4987}
4988
Guido van Rossum8cf04761997-08-02 02:57:45 +00004989void
Fred Drakeba096332000-07-09 07:04:36 +00004990PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004991{
4992 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004993 for (i = 0; i < UCHAR_MAX + 1; i++) {
4994 Py_XDECREF(characters[i]);
4995 characters[i] = NULL;
4996 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004997 Py_XDECREF(nullstring);
4998 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004999}
Barry Warsawa903ad982001-02-23 16:40:48 +00005000
Barry Warsawa903ad982001-02-23 16:40:48 +00005001void _Py_ReleaseInternedStrings(void)
5002{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005003 PyObject *keys;
5004 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005005 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005006
5007 if (interned == NULL || !PyDict_Check(interned))
5008 return;
5009 keys = PyDict_Keys(interned);
5010 if (keys == NULL || !PyList_Check(keys)) {
5011 PyErr_Clear();
5012 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005013 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005014
5015 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5016 detector, interned strings are not forcibly deallocated; rather, we
5017 give them their stolen references back, and then clear and DECREF
5018 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005019
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005020 fprintf(stderr, "releasing interned strings\n");
5021 n = PyList_GET_SIZE(keys);
5022 for (i = 0; i < n; i++) {
5023 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5024 switch (s->ob_sstate) {
5025 case SSTATE_NOT_INTERNED:
5026 /* XXX Shouldn't happen */
5027 break;
5028 case SSTATE_INTERNED_IMMORTAL:
5029 s->ob_refcnt += 1;
5030 break;
5031 case SSTATE_INTERNED_MORTAL:
5032 s->ob_refcnt += 2;
5033 break;
5034 default:
5035 Py_FatalError("Inconsistent interned string state.");
5036 }
5037 s->ob_sstate = SSTATE_NOT_INTERNED;
5038 }
5039 Py_DECREF(keys);
5040 PyDict_Clear(interned);
5041 Py_DECREF(interned);
5042 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005043}