blob: e74744d369028d40adb8d229b548bc1ef211a352 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Fredrik Lundhaf722372006-05-25 17:55:31 +00008#undef USE_INLINE /* XXX - set via configure? */
9
10#if defined(_MSC_VER) /* this is taken from _sre.c */
11#pragma warning(disable: 4710)
12/* fastest possible local call under MSVC */
13#define LOCAL(type) static __inline type __fastcall
14#elif defined(USE_INLINE)
15#define LOCAL(type) static inline type
16#else
17#define LOCAL(type) static type
18#endif
19
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020#ifdef COUNT_ALLOCS
21int null_strings, one_strings;
22#endif
23
Guido van Rossumc0b618a1997-05-02 03:12:38 +000024static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000025static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026
Guido van Rossum45ec02a2002-08-19 21:43:18 +000027/* This dictionary holds all interned strings. Note that references to
28 strings in this dictionary are *not* counted in the string's ob_refcnt.
29 When the interned string reaches a refcnt of 0 the string deallocation
30 function will delete the reference from this dictionary.
31
Tim Petersae1d0c92006-03-17 03:29:34 +000032 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000033 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
34*/
35static PyObject *interned;
36
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000039 For both PyString_FromString() and PyString_FromStringAndSize(), the
40 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000041 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000042
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000044 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 For PyString_FromStringAndSize(), the parameter the parameter `str' is
47 either NULL or else points to a string containing at least `size' bytes.
48 For PyString_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
54 PyString object must be treated as immutable and you must not fill in nor
55 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000056
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000057 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
59 allocated for string data, not counting the null terminating character. It
60 is therefore equal to the equal to the `size' parameter (for
61 PyString_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000065PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000066{
Tim Peters9e897f42001-05-09 07:37:07 +000067 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000068 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 Py_INCREF(op);
74 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000085
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000086 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000087 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000090 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000092 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000096 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000104 PyObject *t = (PyObject *)op;
105 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000106 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111}
112
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000114PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Tim Peters62de65b2001-12-06 20:29:32 +0000116 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000117 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000118
119 assert(str != NULL);
120 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000121 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000122 PyErr_SetString(PyExc_OverflowError,
123 "string is too long for a Python string");
124 return NULL;
125 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 if (size == 0 && (op = nullstring) != NULL) {
127#ifdef COUNT_ALLOCS
128 null_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
133 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134#ifdef COUNT_ALLOCS
135 one_strings++;
136#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 Py_INCREF(op);
138 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000141 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000142 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000147 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000148 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000149 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000170 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000178#ifdef __va_copy
179 __va_copy(count, vargs);
180#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000181 count = vargs;
182#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
188 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
189 ;
190
Tim Peters8931ff12006-05-13 23:28:20 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000196 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000197
Barry Warsawdadace02001-08-24 18:32:06 +0000198 switch (*f) {
199 case 'c':
200 (void)va_arg(count, int);
201 /* fall through... */
202 case '%':
203 n++;
204 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000205 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000206 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 /* 20 bytes is enough to hold a 64-bit
208 integer. Decimal takes the most space.
209 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000210 n += 20;
211 break;
212 case 's':
213 s = va_arg(count, char*);
214 n += strlen(s);
215 break;
216 case 'p':
217 (void) va_arg(count, int);
218 /* maximum 64-bit pointer representation:
219 * 0xffffffffffffffff
220 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000222 */
223 n += 19;
224 break;
225 default:
226 /* if we stumble upon an unknown
227 formatting code, copy the rest of
228 the format string to the output
229 string. (we cannot just skip the
230 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 n += strlen(p);
233 goto expand;
234 }
235 } else
236 n++;
237 }
238 expand:
239 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000240 /* Since we've analyzed how much space we need for the worst case,
241 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000242 string = PyString_FromStringAndSize(NULL, n);
243 if (!string)
244 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000245
Barry Warsawdadace02001-08-24 18:32:06 +0000246 s = PyString_AsString(string);
247
248 for (f = format; *f; f++) {
249 if (*f == '%') {
250 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 Py_ssize_t i;
252 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000253 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000254 /* parse the width.precision part (we're only
255 interested in the precision value, if any) */
256 n = 0;
257 while (isdigit(Py_CHARMASK(*f)))
258 n = (n*10) + *f++ - '0';
259 if (*f == '.') {
260 f++;
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 }
265 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
266 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000267 /* handle the long flag, but only for %ld and %lu.
268 others can be added when necessary. */
269 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000270 longflag = 1;
271 ++f;
272 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000273 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000274 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000275 size_tflag = 1;
276 ++f;
277 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000278
Barry Warsawdadace02001-08-24 18:32:06 +0000279 switch (*f) {
280 case 'c':
281 *s++ = va_arg(vargs, int);
282 break;
283 case 'd':
284 if (longflag)
285 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000286 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
288 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
290 sprintf(s, "%d", va_arg(vargs, int));
291 s += strlen(s);
292 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000293 case 'u':
294 if (longflag)
295 sprintf(s, "%lu",
296 va_arg(vargs, unsigned long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
299 va_arg(vargs, size_t));
300 else
301 sprintf(s, "%u",
302 va_arg(vargs, unsigned int));
303 s += strlen(s);
304 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000305 case 'i':
306 sprintf(s, "%i", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 'x':
310 sprintf(s, "%x", va_arg(vargs, int));
311 s += strlen(s);
312 break;
313 case 's':
314 p = va_arg(vargs, char*);
315 i = strlen(p);
316 if (n > 0 && i > n)
317 i = n;
318 memcpy(s, p, i);
319 s += i;
320 break;
321 case 'p':
322 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000323 /* %p is ill-defined: ensure leading 0x. */
324 if (s[1] == 'X')
325 s[1] = 'x';
326 else if (s[1] != 'x') {
327 memmove(s+2, s, strlen(s)+1);
328 s[0] = '0';
329 s[1] = 'x';
330 }
Barry Warsawdadace02001-08-24 18:32:06 +0000331 s += strlen(s);
332 break;
333 case '%':
334 *s++ = '%';
335 break;
336 default:
337 strcpy(s, p);
338 s += strlen(s);
339 goto end;
340 }
341 } else
342 *s++ = *f;
343 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344
Barry Warsawdadace02001-08-24 18:32:06 +0000345 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000347 return string;
348}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000349
Barry Warsawdadace02001-08-24 18:32:06 +0000350PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000351PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000352{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354 va_list vargs;
355
356#ifdef HAVE_STDARG_PROTOTYPES
357 va_start(vargs, format);
358#else
359 va_start(vargs);
360#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000361 ret = PyString_FromFormatV(format, vargs);
362 va_end(vargs);
363 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000364}
365
366
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000368 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 const char *encoding,
370 const char *errors)
371{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 PyObject *v, *str;
373
374 str = PyString_FromStringAndSize(s, size);
375 if (str == NULL)
376 return NULL;
377 v = PyString_AsDecodedString(str, encoding, errors);
378 Py_DECREF(str);
379 return v;
380}
381
382PyObject *PyString_AsDecodedObject(PyObject *str,
383 const char *encoding,
384 const char *errors)
385{
386 PyObject *v;
387
388 if (!PyString_Check(str)) {
389 PyErr_BadArgument();
390 goto onError;
391 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393 if (encoding == NULL) {
394#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#else
397 PyErr_SetString(PyExc_ValueError, "no encoding specified");
398 goto onError;
399#endif
400 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401
402 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 v = PyCodec_Decode(str, encoding, errors);
404 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000405 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000406
407 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000410 return NULL;
411}
412
413PyObject *PyString_AsDecodedString(PyObject *str,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v;
418
419 v = PyString_AsDecodedObject(str, encoding, errors);
420 if (v == NULL)
421 goto onError;
422
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000423#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424 /* Convert Unicode to a string using the default encoding */
425 if (PyUnicode_Check(v)) {
426 PyObject *temp = v;
427 v = PyUnicode_AsEncodedString(v, NULL, NULL);
428 Py_DECREF(temp);
429 if (v == NULL)
430 goto onError;
431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433 if (!PyString_Check(v)) {
434 PyErr_Format(PyExc_TypeError,
435 "decoder did not return a string object (type=%.400s)",
436 v->ob_type->tp_name);
437 Py_DECREF(v);
438 goto onError;
439 }
440
441 return v;
442
443 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 return NULL;
445}
446
447PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000448 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000453
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 str = PyString_FromStringAndSize(s, size);
455 if (str == NULL)
456 return NULL;
457 v = PyString_AsEncodedString(str, encoding, errors);
458 Py_DECREF(str);
459 return v;
460}
461
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 const char *encoding,
464 const char *errors)
465{
466 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000467
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(str)) {
469 PyErr_BadArgument();
470 goto onError;
471 }
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473 if (encoding == NULL) {
474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#else
477 PyErr_SetString(PyExc_ValueError, "no encoding specified");
478 goto onError;
479#endif
480 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000481
482 /* Encode via the codec registry */
483 v = PyCodec_Encode(str, encoding, errors);
484 if (v == NULL)
485 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
487 return v;
488
489 onError:
490 return NULL;
491}
492
493PyObject *PyString_AsEncodedString(PyObject *str,
494 const char *encoding,
495 const char *errors)
496{
497 PyObject *v;
498
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000499 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000500 if (v == NULL)
501 goto onError;
502
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000503#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 /* Convert Unicode to a string using the default encoding */
505 if (PyUnicode_Check(v)) {
506 PyObject *temp = v;
507 v = PyUnicode_AsEncodedString(v, NULL, NULL);
508 Py_DECREF(temp);
509 if (v == NULL)
510 goto onError;
511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000512#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 if (!PyString_Check(v)) {
514 PyErr_Format(PyExc_TypeError,
515 "encoder did not return a string object (type=%.400s)",
516 v->ob_type->tp_name);
517 Py_DECREF(v);
518 goto onError;
519 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000520
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000521 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000522
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000523 onError:
524 return NULL;
525}
526
Guido van Rossum234f9421993-06-17 12:35:49 +0000527static void
Fred Drakeba096332000-07-09 07:04:36 +0000528string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000529{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000530 switch (PyString_CHECK_INTERNED(op)) {
531 case SSTATE_NOT_INTERNED:
532 break;
533
534 case SSTATE_INTERNED_MORTAL:
535 /* revive dead object temporarily for DelItem */
536 op->ob_refcnt = 3;
537 if (PyDict_DelItem(interned, op) != 0)
538 Py_FatalError(
539 "deletion of interned string failed");
540 break;
541
542 case SSTATE_INTERNED_IMMORTAL:
543 Py_FatalError("Immortal interned string died.");
544
545 default:
546 Py_FatalError("Inconsistent interned string state.");
547 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000548 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000549}
550
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551/* Unescape a backslash-escaped string. If unicode is non-zero,
552 the string is a u-literal. If recode_encoding is non-zero,
553 the string is UTF-8 encoded and should be re-encoded in the
554 specified encoding. */
555
556PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 const char *recode_encoding)
561{
562 int c;
563 char *p, *buf;
564 const char *end;
565 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000566 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000567 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 if (v == NULL)
569 return NULL;
570 p = buf = PyString_AsString(v);
571 end = s + len;
572 while (s < end) {
573 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000574 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575#ifdef Py_USING_UNICODE
576 if (recode_encoding && (*s & 0x80)) {
577 PyObject *u, *w;
578 char *r;
579 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000580 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 t = s;
582 /* Decode non-ASCII bytes as UTF-8. */
583 while (t < end && (*t & 0x80)) t++;
584 u = PyUnicode_DecodeUTF8(s, t - s, errors);
585 if(!u) goto failed;
586
587 /* Recode them in target encoding. */
588 w = PyUnicode_AsEncodedString(
589 u, recode_encoding, errors);
590 Py_DECREF(u);
591 if (!w) goto failed;
592
593 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000594 assert(PyString_Check(w));
595 r = PyString_AS_STRING(w);
596 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 memcpy(p, r, rn);
598 p += rn;
599 Py_DECREF(w);
600 s = t;
601 } else {
602 *p++ = *s++;
603 }
604#else
605 *p++ = *s++;
606#endif
607 continue;
608 }
609 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000610 if (s==end) {
611 PyErr_SetString(PyExc_ValueError,
612 "Trailing \\ in string");
613 goto failed;
614 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000615 switch (*s++) {
616 /* XXX This assumes ASCII! */
617 case '\n': break;
618 case '\\': *p++ = '\\'; break;
619 case '\'': *p++ = '\''; break;
620 case '\"': *p++ = '\"'; break;
621 case 'b': *p++ = '\b'; break;
622 case 'f': *p++ = '\014'; break; /* FF */
623 case 't': *p++ = '\t'; break;
624 case 'n': *p++ = '\n'; break;
625 case 'r': *p++ = '\r'; break;
626 case 'v': *p++ = '\013'; break; /* VT */
627 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
628 case '0': case '1': case '2': case '3':
629 case '4': case '5': case '6': case '7':
630 c = s[-1] - '0';
631 if ('0' <= *s && *s <= '7') {
632 c = (c<<3) + *s++ - '0';
633 if ('0' <= *s && *s <= '7')
634 c = (c<<3) + *s++ - '0';
635 }
636 *p++ = c;
637 break;
638 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000639 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 && isxdigit(Py_CHARMASK(s[1]))) {
641 unsigned int x = 0;
642 c = Py_CHARMASK(*s);
643 s++;
644 if (isdigit(c))
645 x = c - '0';
646 else if (islower(c))
647 x = 10 + c - 'a';
648 else
649 x = 10 + c - 'A';
650 x = x << 4;
651 c = Py_CHARMASK(*s);
652 s++;
653 if (isdigit(c))
654 x += c - '0';
655 else if (islower(c))
656 x += 10 + c - 'a';
657 else
658 x += 10 + c - 'A';
659 *p++ = x;
660 break;
661 }
662 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000663 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667 if (strcmp(errors, "replace") == 0) {
668 *p++ = '?';
669 } else if (strcmp(errors, "ignore") == 0)
670 /* do nothing */;
671 else {
672 PyErr_Format(PyExc_ValueError,
673 "decoding error; "
674 "unknown error handling code: %.400s",
675 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#ifndef Py_USING_UNICODE
679 case 'u':
680 case 'U':
681 case 'N':
682 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000683 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 "Unicode escapes not legal "
685 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000686 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 }
688#endif
689 default:
690 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000691 s--;
692 goto non_esc; /* an arbitry number of unescaped
693 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 }
695 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000696 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000698 return v;
699 failed:
700 Py_DECREF(v);
701 return NULL;
702}
703
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705string_getsize(register PyObject *op)
706{
707 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (PyString_AsStringAndSize(op, &s, &len))
710 return -1;
711 return len;
712}
713
714static /*const*/ char *
715string_getbuffer(register PyObject *op)
716{
717 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000718 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
720 return NULL;
721 return s;
722}
723
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
732/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000733PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (!PyString_Check(op))
736 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738}
739
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740int
741PyString_AsStringAndSize(register PyObject *obj,
742 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000743 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744{
745 if (s == NULL) {
746 PyErr_BadInternalCall();
747 return -1;
748 }
749
750 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 if (PyUnicode_Check(obj)) {
753 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
754 if (obj == NULL)
755 return -1;
756 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000757 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000758#endif
759 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_Format(PyExc_TypeError,
761 "expected string or Unicode object, "
762 "%.200s found", obj->ob_type->tp_name);
763 return -1;
764 }
765 }
766
767 *s = PyString_AS_STRING(obj);
768 if (len != NULL)
769 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000770 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000771 PyErr_SetString(PyExc_TypeError,
772 "expected string without null bytes");
773 return -1;
774 }
775 return 0;
776}
777
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778/* -------------------------------------------------------------------- */
779/* Helpers */
780
781#define USE_FAST /* experimental fast search implementation */
782
783/* XXX - this code is copied from unicodeobject.c. we really should
784 refactor the core implementations (see _sre.c for how this can be
785 done), but that'll have to wait -- fredrik */
786
787/* fast search/count implementation, based on a mix between boyer-
788 moore and horspool, with a few more bells and whistles on the top.
789 for some more background, see: http://effbot.org/stringlib */
790
791/* note: fastsearch may access s[n], which isn't a problem when using
792 Python's ordinary string types, but may cause problems if you're
793 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000794 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000795 it has actually checked for matches, but didn't find any. callers
796 beware! */
797
798#define FAST_COUNT 0
799#define FAST_SEARCH 1
800
801LOCAL(Py_ssize_t)
802 fastsearch(const unsigned char* s, Py_ssize_t n, const unsigned char* p,
803 Py_ssize_t m, int mode)
804{
805 long mask;
806 int skip, count = 0;
807 Py_ssize_t i, j, mlast, w;
808
809 w = n - m;
810
811 if (w < 0)
812 return -1;
813
814 /* look for special cases */
815 if (m <= 1) {
816 if (m <= 0)
817 return -1;
818 /* use special case for 1-character strings */
819 if (mode == FAST_COUNT) {
820 for (i = 0; i < n; i++)
821 if (s[i] == p[0])
822 count++;
823 return count;
824 } else {
825 for (i = 0; i < n; i++)
826 if (s[i] == p[0])
827 return i;
828 }
829 return -1;
830 }
831
832 mlast = m - 1;
833
834 /* create compressed boyer-moore delta 1 table */
835 skip = mlast - 1;
836 /* process pattern[:-1] */
837 for (mask = i = 0; i < mlast; i++) {
838 mask |= (1 << (p[i] & 0x1F));
839 if (p[i] == p[mlast])
840 skip = mlast - i - 1;
841 }
842 /* process pattern[-1] outside the loop */
843 mask |= (1 << (p[mlast] & 0x1F));
844
845 for (i = 0; i <= w; i++) {
846 /* note: using mlast in the skip path slows things down on x86 */
847 if (s[i+m-1] == p[m-1]) {
848 /* candidate match */
849 for (j = 0; j < mlast; j++)
850 if (s[i+j] != p[j])
851 break;
852 if (j == mlast) {
853 /* got a match! */
854 if (mode != FAST_COUNT)
855 return i;
856 count++;
857 i = i + mlast;
858 continue;
859 }
860 /* miss: check if next character is part of pattern */
861 if (!(mask & (1 << (s[i+m] & 0x1F))))
862 i = i + m;
863 else {
864 i = i + skip;
865 continue;
866 }
867 } else {
868 /* skip: check if next character is part of pattern */
869 if (!(mask & (1 << (s[i+m] & 0x1F))))
870 i = i + m;
871 }
872 }
873
874 if (mode != FAST_COUNT)
875 return -1;
876 return count;
877}
878
879/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880/* Methods */
881
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000882static int
Fred Drakeba096332000-07-09 07:04:36 +0000883string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000885 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000888
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000889 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000890 if (! PyString_CheckExact(op)) {
891 int ret;
892 /* A str subclass may have its own __str__ method. */
893 op = (PyStringObject *) PyObject_Str((PyObject *)op);
894 if (op == NULL)
895 return -1;
896 ret = string_print(op, fp, flags);
897 Py_DECREF(op);
898 return ret;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000901#ifdef __VMS
902 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
903#else
904 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
905#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000906 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000908
Thomas Wouters7e474022000-07-16 12:04:32 +0000909 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000910 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000911 if (memchr(op->ob_sval, '\'', op->ob_size) &&
912 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000913 quote = '"';
914
915 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 for (i = 0; i < op->ob_size; i++) {
917 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000918 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000919 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000920 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000921 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000922 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000923 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000924 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000925 fprintf(fp, "\\r");
926 else if (c < ' ' || c >= 0x7f)
927 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000928 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000929 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000931 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000932 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933}
934
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000935PyObject *
936PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000938 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000939 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000940 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000941 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000942 PyErr_SetString(PyExc_OverflowError,
943 "string is too large to make repr");
944 }
945 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000947 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
949 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951 register char c;
952 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000953 int quote;
954
Thomas Wouters7e474022000-07-16 12:04:32 +0000955 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000956 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000957 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000958 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000959 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000960 quote = '"';
961
Tim Peters9161c8b2001-12-03 01:55:38 +0000962 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000963 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000969 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000971 else if (c == '\t')
972 *p++ = '\\', *p++ = 't';
973 else if (c == '\n')
974 *p++ = '\\', *p++ = 'n';
975 else if (c == '\r')
976 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000977 else if (c < ' ' || c >= 0x7f) {
978 /* For performance, we don't want to call
979 PyOS_snprintf here (extra layers of
980 function call). */
981 sprintf(p, "\\x%02x", c & 0xff);
982 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000983 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000984 else
985 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000987 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000988 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000991 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000992 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994}
995
Guido van Rossum189f1df2001-05-01 16:51:53 +0000996static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000997string_repr(PyObject *op)
998{
999 return PyString_Repr(op, 1);
1000}
1001
1002static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001003string_str(PyObject *s)
1004{
Tim Petersc9933152001-10-16 20:18:24 +00001005 assert(PyString_Check(s));
1006 if (PyString_CheckExact(s)) {
1007 Py_INCREF(s);
1008 return s;
1009 }
1010 else {
1011 /* Subtype -- return genuine string with the same value. */
1012 PyStringObject *t = (PyStringObject *) s;
1013 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
1014 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001015}
1016
Martin v. Löwis18e16552006-02-15 17:27:45 +00001017static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001018string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001019{
1020 return a->ob_size;
1021}
1022
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025{
Andrew Dalke598710c2006-05-25 18:18:39 +00001026 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 register PyStringObject *op;
1028 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 if (PyUnicode_Check(bb))
1031 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001032#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001033 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001034 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001035 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036 return NULL;
1037 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001040 if ((a->ob_size == 0 || b->ob_size == 0) &&
1041 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042 if (a->ob_size == 0) {
1043 Py_INCREF(bb);
1044 return bb;
1045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 Py_INCREF(a);
1047 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 }
1049 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001050 if (size < 0) {
1051 PyErr_SetString(PyExc_OverflowError,
1052 "strings are too large to concat");
1053 return NULL;
1054 }
1055
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001056 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001057 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001058 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001060 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001061 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001062 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001063 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1064 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001065 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067#undef b
1068}
1069
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001070static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001071string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001072{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 register Py_ssize_t i;
1074 register Py_ssize_t j;
1075 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001077 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 if (n < 0)
1079 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001080 /* watch out for overflows: the size can overflow int,
1081 * and the # of bytes needed can overflow size_t
1082 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001083 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001084 if (n && size / n != a->ob_size) {
1085 PyErr_SetString(PyExc_OverflowError,
1086 "repeated string is too long");
1087 return NULL;
1088 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001089 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 Py_INCREF(a);
1091 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092 }
Tim Peterse7c05322004-06-27 17:24:49 +00001093 nbytes = (size_t)size;
1094 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001095 PyErr_SetString(PyExc_OverflowError,
1096 "repeated string is too long");
1097 return NULL;
1098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001100 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001101 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001102 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001103 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001104 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001105 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001106 op->ob_sval[size] = '\0';
1107 if (a->ob_size == 1 && n > 0) {
1108 memset(op->ob_sval, a->ob_sval[0] , n);
1109 return (PyObject *) op;
1110 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001111 i = 0;
1112 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001113 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1114 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001115 }
1116 while (i < size) {
1117 j = (i <= size-i) ? i : size-i;
1118 memcpy(op->ob_sval+i, op->ob_sval, j);
1119 i += j;
1120 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001122}
1123
1124/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001126static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001127string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001129 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001130{
1131 if (i < 0)
1132 i = 0;
1133 if (j < 0)
1134 j = 0; /* Avoid signed/unsigned bug in next line */
1135 if (j > a->ob_size)
1136 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001137 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1138 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001139 Py_INCREF(a);
1140 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001141 }
1142 if (j < i)
1143 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001144 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001145}
1146
Guido van Rossum9284a572000-03-07 15:53:43 +00001147static int
Fred Drakeba096332000-07-09 07:04:36 +00001148string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001149{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001150 char *s = PyString_AS_STRING(a);
1151 const char *sub = PyString_AS_STRING(el);
1152 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001153 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001154 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001155 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001156
1157 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001158#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001159 if (PyUnicode_Check(el))
1160 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001161#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001162 if (!PyString_Check(el)) {
1163 PyErr_SetString(PyExc_TypeError,
1164 "'in <string>' requires string as left operand");
1165 return -1;
1166 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001167 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001168
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001169 if (len_sub == 0)
1170 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001171 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001172 substring. When s<last, there is still room for a possible match
1173 and s[0] through s[len_sub-1] will be in bounds.
1174 shortsub is len_sub minus the last character which is checked
1175 separately just before the memcmp(). That check helps prevent
1176 false starts and saves the setup time for memcmp().
1177 */
1178 firstchar = sub[0];
1179 shortsub = len_sub - 1;
1180 lastchar = sub[shortsub];
1181 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1182 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001183 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001184 if (s == NULL)
1185 return 0;
1186 assert(s < last);
1187 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001188 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001189 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001190 }
1191 return 0;
1192}
1193
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001194static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001195string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001196{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001197 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001198 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001199 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001200 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001201 return NULL;
1202 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001203 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001204 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001205 if (v == NULL)
1206 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001207 else {
1208#ifdef COUNT_ALLOCS
1209 one_strings++;
1210#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001211 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001212 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001213 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001214}
1215
Martin v. Löwiscd353062001-05-24 16:56:35 +00001216static PyObject*
1217string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001218{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001219 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001220 Py_ssize_t len_a, len_b;
1221 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001222 PyObject *result;
1223
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001224 /* Make sure both arguments are strings. */
1225 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001226 result = Py_NotImplemented;
1227 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001228 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001229 if (a == b) {
1230 switch (op) {
1231 case Py_EQ:case Py_LE:case Py_GE:
1232 result = Py_True;
1233 goto out;
1234 case Py_NE:case Py_LT:case Py_GT:
1235 result = Py_False;
1236 goto out;
1237 }
1238 }
1239 if (op == Py_EQ) {
1240 /* Supporting Py_NE here as well does not save
1241 much time, since Py_NE is rarely used. */
1242 if (a->ob_size == b->ob_size
1243 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001244 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001245 a->ob_size) == 0)) {
1246 result = Py_True;
1247 } else {
1248 result = Py_False;
1249 }
1250 goto out;
1251 }
1252 len_a = a->ob_size; len_b = b->ob_size;
1253 min_len = (len_a < len_b) ? len_a : len_b;
1254 if (min_len > 0) {
1255 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1256 if (c==0)
1257 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1258 }else
1259 c = 0;
1260 if (c == 0)
1261 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1262 switch (op) {
1263 case Py_LT: c = c < 0; break;
1264 case Py_LE: c = c <= 0; break;
1265 case Py_EQ: assert(0); break; /* unreachable */
1266 case Py_NE: c = c != 0; break;
1267 case Py_GT: c = c > 0; break;
1268 case Py_GE: c = c >= 0; break;
1269 default:
1270 result = Py_NotImplemented;
1271 goto out;
1272 }
1273 result = c ? Py_True : Py_False;
1274 out:
1275 Py_INCREF(result);
1276 return result;
1277}
1278
1279int
1280_PyString_Eq(PyObject *o1, PyObject *o2)
1281{
1282 PyStringObject *a, *b;
1283 a = (PyStringObject*)o1;
1284 b = (PyStringObject*)o2;
1285 return a->ob_size == b->ob_size
1286 && *a->ob_sval == *b->ob_sval
1287 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001288}
1289
Guido van Rossum9bfef441993-03-29 10:43:31 +00001290static long
Fred Drakeba096332000-07-09 07:04:36 +00001291string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001292{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001294 register unsigned char *p;
1295 register long x;
1296
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001297 if (a->ob_shash != -1)
1298 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001299 len = a->ob_size;
1300 p = (unsigned char *) a->ob_sval;
1301 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001302 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001303 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001304 x ^= a->ob_size;
1305 if (x == -1)
1306 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001307 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001308 return x;
1309}
1310
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001311#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1312
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001313static PyObject*
1314string_subscript(PyStringObject* self, PyObject* item)
1315{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001316 PyNumberMethods *nb = item->ob_type->tp_as_number;
1317 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1318 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001319 if (i == -1 && PyErr_Occurred())
1320 return NULL;
1321 if (i < 0)
1322 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001323 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001324 }
1325 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001326 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001327 char* source_buf;
1328 char* result_buf;
1329 PyObject* result;
1330
Tim Petersae1d0c92006-03-17 03:29:34 +00001331 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332 PyString_GET_SIZE(self),
1333 &start, &stop, &step, &slicelength) < 0) {
1334 return NULL;
1335 }
1336
1337 if (slicelength <= 0) {
1338 return PyString_FromStringAndSize("", 0);
1339 }
1340 else {
1341 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001342 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001343 if (result_buf == NULL)
1344 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001345
Tim Petersae1d0c92006-03-17 03:29:34 +00001346 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001347 cur += step, i++) {
1348 result_buf[i] = source_buf[cur];
1349 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001350
1351 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001352 slicelength);
1353 PyMem_Free(result_buf);
1354 return result;
1355 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001356 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001357 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001358 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001359 "string indices must be integers");
1360 return NULL;
1361 }
1362}
1363
Martin v. Löwis18e16552006-02-15 17:27:45 +00001364static Py_ssize_t
1365string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001366{
1367 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001368 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001369 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001370 return -1;
1371 }
1372 *ptr = (void *)self->ob_sval;
1373 return self->ob_size;
1374}
1375
Martin v. Löwis18e16552006-02-15 17:27:45 +00001376static Py_ssize_t
1377string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001378{
Guido van Rossum045e6881997-09-08 18:30:11 +00001379 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001380 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001381 return -1;
1382}
1383
Martin v. Löwis18e16552006-02-15 17:27:45 +00001384static Py_ssize_t
1385string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001386{
1387 if ( lenp )
1388 *lenp = self->ob_size;
1389 return 1;
1390}
1391
Martin v. Löwis18e16552006-02-15 17:27:45 +00001392static Py_ssize_t
1393string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001394{
1395 if ( index != 0 ) {
1396 PyErr_SetString(PyExc_SystemError,
1397 "accessing non-existent string segment");
1398 return -1;
1399 }
1400 *ptr = self->ob_sval;
1401 return self->ob_size;
1402}
1403
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001404static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001405 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001406 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001407 (ssizeargfunc)string_repeat, /*sq_repeat*/
1408 (ssizeargfunc)string_item, /*sq_item*/
1409 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001410 0, /*sq_ass_item*/
1411 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001412 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001413};
1414
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001415static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001416 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001417 (binaryfunc)string_subscript,
1418 0,
1419};
1420
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001421static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001422 (readbufferproc)string_buffer_getreadbuf,
1423 (writebufferproc)string_buffer_getwritebuf,
1424 (segcountproc)string_buffer_getsegcount,
1425 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001426};
1427
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428
1429
1430#define LEFTSTRIP 0
1431#define RIGHTSTRIP 1
1432#define BOTHSTRIP 2
1433
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001434/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001435static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1436
1437#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001438
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439#define SPLIT_APPEND(data, left, right) \
1440 str = PyString_FromStringAndSize((data) + (left), \
1441 (right) - (left)); \
1442 if (str == NULL) \
1443 goto onError; \
1444 if (PyList_Append(list, str)) { \
1445 Py_DECREF(str); \
1446 goto onError; \
1447 } \
1448 else \
1449 Py_DECREF(str);
1450
1451#define SPLIT_INSERT(data, left, right) \
1452 str = PyString_FromStringAndSize((data) + (left), \
1453 (right) - (left)); \
1454 if (str == NULL) \
1455 goto onError; \
1456 if (PyList_Insert(list, 0, str)) { \
1457 Py_DECREF(str); \
1458 goto onError; \
1459 } \
1460 else \
1461 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462
1463static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001464split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001466 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001467 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468 PyObject *list = PyList_New(0);
1469
1470 if (list == NULL)
1471 return NULL;
1472
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 for (i = j = 0; i < len; ) {
1474 while (i < len && isspace(Py_CHARMASK(s[i])))
1475 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001477 while (i < len && !isspace(Py_CHARMASK(s[i])))
1478 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 if (maxsplit-- <= 0)
1481 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001482 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 while (i < len && isspace(Py_CHARMASK(s[i])))
1484 i++;
1485 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 }
1487 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001488 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 Py_DECREF(list);
1494 return NULL;
1495}
1496
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001497static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001498split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001499{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001500 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001501 PyObject *str;
1502 PyObject *list = PyList_New(0);
1503
1504 if (list == NULL)
1505 return NULL;
1506
1507 for (i = j = 0; i < len; ) {
1508 if (s[i] == ch) {
1509 if (maxcount-- <= 0)
1510 break;
1511 SPLIT_APPEND(s, j, i);
1512 i = j = i + 1;
1513 } else
1514 i++;
1515 }
1516 if (j <= len) {
1517 SPLIT_APPEND(s, j, len);
1518 }
1519 return list;
1520
1521 onError:
1522 Py_DECREF(list);
1523 return NULL;
1524}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001526PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527"S.split([sep [,maxsplit]]) -> list of strings\n\
1528\n\
1529Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001530delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001531splits are done. If sep is not specified or is None, any\n\
1532whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533
1534static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001535string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001537 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1538 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001539 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540 const char *s = PyString_AS_STRING(self), *sub;
1541 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542
Martin v. Löwis9c830762006-04-13 08:37:17 +00001543 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001546 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 if (PyString_Check(subobj)) {
1550 sub = PyString_AS_STRING(subobj);
1551 n = PyString_GET_SIZE(subobj);
1552 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001553#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 else if (PyUnicode_Check(subobj))
1555 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001556#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1558 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001559
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560 if (n == 0) {
1561 PyErr_SetString(PyExc_ValueError, "empty separator");
1562 return NULL;
1563 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001564 else if (n == 1)
1565 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566
1567 list = PyList_New(0);
1568 if (list == NULL)
1569 return NULL;
1570
1571 i = j = 0;
1572 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001573 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001574 if (maxsplit-- <= 0)
1575 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001576 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001577 if (item == NULL)
1578 goto fail;
1579 err = PyList_Append(list, item);
1580 Py_DECREF(item);
1581 if (err < 0)
1582 goto fail;
1583 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584 }
1585 else
1586 i++;
1587 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001588 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589 if (item == NULL)
1590 goto fail;
1591 err = PyList_Append(list, item);
1592 Py_DECREF(item);
1593 if (err < 0)
1594 goto fail;
1595
1596 return list;
1597
1598 fail:
1599 Py_DECREF(list);
1600 return NULL;
1601}
1602
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001603static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001604rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001605{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001606 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001607 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001608 PyObject *list = PyList_New(0);
1609
1610 if (list == NULL)
1611 return NULL;
1612
1613 for (i = j = len - 1; i >= 0; ) {
1614 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1615 i--;
1616 j = i;
1617 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1618 i--;
1619 if (j > i) {
1620 if (maxsplit-- <= 0)
1621 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1624 i--;
1625 j = i;
1626 }
1627 }
1628 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001630 }
1631 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001632 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001633 Py_DECREF(list);
1634 return NULL;
1635}
1636
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001637static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001638rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001640 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 PyObject *str;
1642 PyObject *list = PyList_New(0);
1643
1644 if (list == NULL)
1645 return NULL;
1646
1647 for (i = j = len - 1; i >= 0; ) {
1648 if (s[i] == ch) {
1649 if (maxcount-- <= 0)
1650 break;
1651 SPLIT_INSERT(s, i + 1, j + 1);
1652 j = i = i - 1;
1653 } else
1654 i--;
1655 }
1656 if (j >= -1) {
1657 SPLIT_INSERT(s, 0, j + 1);
1658 }
1659 return list;
1660
1661 onError:
1662 Py_DECREF(list);
1663 return NULL;
1664}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001665
1666PyDoc_STRVAR(rsplit__doc__,
1667"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1668\n\
1669Return a list of the words in the string S, using sep as the\n\
1670delimiter string, starting at the end of the string and working\n\
1671to the front. If maxsplit is given, at most maxsplit splits are\n\
1672done. If sep is not specified or is None, any whitespace string\n\
1673is a separator.");
1674
1675static PyObject *
1676string_rsplit(PyStringObject *self, PyObject *args)
1677{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001678 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1679 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001680 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 const char *s = PyString_AS_STRING(self), *sub;
1682 PyObject *list, *item, *subobj = Py_None;
1683
Martin v. Löwis9c830762006-04-13 08:37:17 +00001684 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001685 return NULL;
1686 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001687 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001688 if (subobj == Py_None)
1689 return rsplit_whitespace(s, len, maxsplit);
1690 if (PyString_Check(subobj)) {
1691 sub = PyString_AS_STRING(subobj);
1692 n = PyString_GET_SIZE(subobj);
1693 }
1694#ifdef Py_USING_UNICODE
1695 else if (PyUnicode_Check(subobj))
1696 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1697#endif
1698 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1699 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001700
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 if (n == 0) {
1702 PyErr_SetString(PyExc_ValueError, "empty separator");
1703 return NULL;
1704 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001705 else if (n == 1)
1706 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707
1708 list = PyList_New(0);
1709 if (list == NULL)
1710 return NULL;
1711
1712 j = len;
1713 i = j - n;
1714 while (i >= 0) {
1715 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1716 if (maxsplit-- <= 0)
1717 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001718 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719 if (item == NULL)
1720 goto fail;
1721 err = PyList_Insert(list, 0, item);
1722 Py_DECREF(item);
1723 if (err < 0)
1724 goto fail;
1725 j = i;
1726 i -= n;
1727 }
1728 else
1729 i--;
1730 }
1731 item = PyString_FromStringAndSize(s, j);
1732 if (item == NULL)
1733 goto fail;
1734 err = PyList_Insert(list, 0, item);
1735 Py_DECREF(item);
1736 if (err < 0)
1737 goto fail;
1738
1739 return list;
1740
1741 fail:
1742 Py_DECREF(list);
1743 return NULL;
1744}
1745
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001747PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748"S.join(sequence) -> string\n\
1749\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001751sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
1753static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001754string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755{
1756 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001757 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001760 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001763 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764
Tim Peters19fe14e2001-01-19 03:03:47 +00001765 seq = PySequence_Fast(orig, "");
1766 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001767 return NULL;
1768 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001769
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001770 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 if (seqlen == 0) {
1772 Py_DECREF(seq);
1773 return PyString_FromString("");
1774 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001777 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1778 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001779 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001780 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001781 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001783
Raymond Hettinger674f2412004-08-23 23:23:54 +00001784 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001785 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001786 * Do a pre-pass to figure out the total amount of space we'll
1787 * need (sz), see whether any argument is absurd, and defer to
1788 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001789 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 item = PySequence_Fast_GET_ITEM(seq, i);
1793 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001794#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001796 /* Defer to Unicode join.
1797 * CAUTION: There's no gurantee that the
1798 * original sequence can be iterated over
1799 * again, so we must pass seq here.
1800 */
1801 PyObject *result;
1802 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001803 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001804 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001805 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001806#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001807 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001808 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001809 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001810 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001811 Py_DECREF(seq);
1812 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001813 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 sz += PyString_GET_SIZE(item);
1815 if (i != 0)
1816 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001817 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001818 PyErr_SetString(PyExc_OverflowError,
1819 "join() is too long for a Python string");
1820 Py_DECREF(seq);
1821 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 }
1824
1825 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001827 if (res == NULL) {
1828 Py_DECREF(seq);
1829 return NULL;
1830 }
1831
1832 /* Catenate everything. */
1833 p = PyString_AS_STRING(res);
1834 for (i = 0; i < seqlen; ++i) {
1835 size_t n;
1836 item = PySequence_Fast_GET_ITEM(seq, i);
1837 n = PyString_GET_SIZE(item);
1838 memcpy(p, PyString_AS_STRING(item), n);
1839 p += n;
1840 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001841 memcpy(p, sep, seplen);
1842 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001843 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001845
Jeremy Hylton49048292000-07-11 03:28:17 +00001846 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848}
1849
Tim Peters52e155e2001-06-16 05:42:57 +00001850PyObject *
1851_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001852{
Tim Petersa7259592001-06-16 05:11:17 +00001853 assert(sep != NULL && PyString_Check(sep));
1854 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001855 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001856}
1857
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001858static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001859string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001860{
1861 if (*end > len)
1862 *end = len;
1863 else if (*end < 0)
1864 *end += len;
1865 if (*end < 0)
1866 *end = 0;
1867 if (*start < 0)
1868 *start += len;
1869 if (*start < 0)
1870 *start = 0;
1871}
1872
Martin v. Löwis18e16552006-02-15 17:27:45 +00001873static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001874string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001876 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001877 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001878 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880
Martin v. Löwis18e16552006-02-15 17:27:45 +00001881 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001882 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001883 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001884 return -2;
1885 if (PyString_Check(subobj)) {
1886 sub = PyString_AS_STRING(subobj);
1887 n = PyString_GET_SIZE(subobj);
1888 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001889#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001890 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001891 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001892#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 return -2;
1895
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001896 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897
Guido van Rossum4c08d552000-03-10 22:55:18 +00001898 if (dir > 0) {
1899 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001901 last -= n;
1902 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001903 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904 return (long)i;
1905 }
1906 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001908
Guido van Rossum4c08d552000-03-10 22:55:18 +00001909 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001910 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001911 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001912 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001914 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001915
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 return -1;
1917}
1918
1919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921"S.find(sub [,start [,end]]) -> int\n\
1922\n\
1923Return the lowest index in S where substring sub is found,\n\
1924such that sub is contained within s[start,end]. Optional\n\
1925arguments start and end are interpreted as in slice notation.\n\
1926\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001927Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001930string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 if (result == -2)
1934 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001935 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940"S.index(sub [,start [,end]]) -> int\n\
1941\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001942Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943
1944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001945string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001947 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 if (result == -2)
1949 return NULL;
1950 if (result == -1) {
1951 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001952 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 return NULL;
1954 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001959PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960"S.rfind(sub [,start [,end]]) -> int\n\
1961\n\
1962Return the highest index in S where substring sub is found,\n\
1963such that sub is contained within s[start,end]. Optional\n\
1964arguments start and end are interpreted as in slice notation.\n\
1965\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
1968static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001969string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 if (result == -2)
1973 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975}
1976
1977
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001978PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979"S.rindex(sub [,start [,end]]) -> int\n\
1980\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001981Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982
1983static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001984string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001986 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 if (result == -2)
1988 return NULL;
1989 if (result == -1) {
1990 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001991 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 return NULL;
1993 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001994 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995}
1996
1997
1998static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001999do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2000{
2001 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002002 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002003 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002004 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2005 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002006
2007 i = 0;
2008 if (striptype != RIGHTSTRIP) {
2009 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2010 i++;
2011 }
2012 }
2013
2014 j = len;
2015 if (striptype != LEFTSTRIP) {
2016 do {
2017 j--;
2018 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2019 j++;
2020 }
2021
2022 if (i == 0 && j == len && PyString_CheckExact(self)) {
2023 Py_INCREF(self);
2024 return (PyObject*)self;
2025 }
2026 else
2027 return PyString_FromStringAndSize(s+i, j-i);
2028}
2029
2030
2031static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002032do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033{
2034 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002035 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 i = 0;
2038 if (striptype != RIGHTSTRIP) {
2039 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2040 i++;
2041 }
2042 }
2043
2044 j = len;
2045 if (striptype != LEFTSTRIP) {
2046 do {
2047 j--;
2048 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2049 j++;
2050 }
2051
Tim Peters8fa5dd02001-09-12 02:18:30 +00002052 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053 Py_INCREF(self);
2054 return (PyObject*)self;
2055 }
2056 else
2057 return PyString_FromStringAndSize(s+i, j-i);
2058}
2059
2060
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002061static PyObject *
2062do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2063{
2064 PyObject *sep = NULL;
2065
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002066 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067 return NULL;
2068
2069 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002070 if (PyString_Check(sep))
2071 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002072#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002073 else if (PyUnicode_Check(sep)) {
2074 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2075 PyObject *res;
2076 if (uniself==NULL)
2077 return NULL;
2078 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2079 striptype, sep);
2080 Py_DECREF(uniself);
2081 return res;
2082 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002083#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002084 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002085#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002086 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002087#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002088 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002089#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002090 STRIPNAME(striptype));
2091 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002092 }
2093
2094 return do_strip(self, striptype);
2095}
2096
2097
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002098PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002099"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100\n\
2101Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002103If chars is given and not None, remove characters in chars instead.\n\
2104If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105
2106static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002107string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002109 if (PyTuple_GET_SIZE(args) == 0)
2110 return do_strip(self, BOTHSTRIP); /* Common case */
2111 else
2112 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113}
2114
2115
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002116PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002117"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002119Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002120If chars is given and not None, remove characters in chars instead.\n\
2121If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122
2123static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002124string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002126 if (PyTuple_GET_SIZE(args) == 0)
2127 return do_strip(self, LEFTSTRIP); /* Common case */
2128 else
2129 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130}
2131
2132
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002133PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002134"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002137If chars is given and not None, remove characters in chars instead.\n\
2138If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
2140static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002141string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002143 if (PyTuple_GET_SIZE(args) == 0)
2144 return do_strip(self, RIGHTSTRIP); /* Common case */
2145 else
2146 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147}
2148
2149
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002150PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151"S.lower() -> string\n\
2152\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002153Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002155/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2156#ifndef _tolower
2157#define _tolower tolower
2158#endif
2159
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002161string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002163 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002164 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002165 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002167 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002170
2171 s = PyString_AS_STRING(newobj);
2172
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002173 memcpy(s, PyString_AS_STRING(self), n);
2174
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002176 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002177 if (isupper(c))
2178 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180
Anthony Baxtera6286212006-04-11 07:42:36 +00002181 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182}
2183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185"S.upper() -> string\n\
2186\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002189#ifndef _toupper
2190#define _toupper toupper
2191#endif
2192
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002194string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002196 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002197 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002198 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002200 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002201 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002203
2204 s = PyString_AS_STRING(newobj);
2205
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002206 memcpy(s, PyString_AS_STRING(self), n);
2207
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002209 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002210 if (islower(c))
2211 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002213
Anthony Baxtera6286212006-04-11 07:42:36 +00002214 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215}
2216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218"S.title() -> string\n\
2219\n\
2220Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002221characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222
2223static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002224string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225{
2226 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002227 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230
Anthony Baxtera6286212006-04-11 07:42:36 +00002231 newobj = PyString_FromStringAndSize(NULL, n);
2232 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002234 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235 for (i = 0; i < n; i++) {
2236 int c = Py_CHARMASK(*s++);
2237 if (islower(c)) {
2238 if (!previous_is_cased)
2239 c = toupper(c);
2240 previous_is_cased = 1;
2241 } else if (isupper(c)) {
2242 if (previous_is_cased)
2243 c = tolower(c);
2244 previous_is_cased = 1;
2245 } else
2246 previous_is_cased = 0;
2247 *s_new++ = c;
2248 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002249 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250}
2251
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002252PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253"S.capitalize() -> string\n\
2254\n\
2255Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002256capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257
2258static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002259string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260{
2261 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002262 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002263 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
Anthony Baxtera6286212006-04-11 07:42:36 +00002265 newobj = PyString_FromStringAndSize(NULL, n);
2266 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002268 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269 if (0 < n) {
2270 int c = Py_CHARMASK(*s++);
2271 if (islower(c))
2272 *s_new = toupper(c);
2273 else
2274 *s_new = c;
2275 s_new++;
2276 }
2277 for (i = 1; i < n; i++) {
2278 int c = Py_CHARMASK(*s++);
2279 if (isupper(c))
2280 *s_new = tolower(c);
2281 else
2282 *s_new = c;
2283 s_new++;
2284 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002285 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286}
2287
2288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002289PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290"S.count(sub[, start[, end]]) -> int\n\
2291\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002292Return the number of non-overlapping occurrences of substring sub in\n\
2293string S[start:end]. Optional arguments start and end are interpreted\n\
2294as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295
2296static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002297string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002299 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002300 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002301 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002302 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304
Guido van Rossumc6821402000-05-08 14:08:05 +00002305 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2306 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002308
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 if (PyString_Check(subobj)) {
2310 sub = PyString_AS_STRING(subobj);
2311 n = PyString_GET_SIZE(subobj);
2312 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002313#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002314 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002315 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002316 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2317 if (count == -1)
2318 return NULL;
2319 else
2320 return PyInt_FromLong((long) count);
2321 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002322#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002323 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2324 return NULL;
2325
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002326 string_adjust_indices(&i, &last, len);
2327
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328 m = last + 1 - n;
2329 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002330 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331
Fredrik Lundhaf722372006-05-25 17:55:31 +00002332#ifdef USE_FAST
2333 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2334 if (r < 0)
2335 r = 0; /* no match */
2336#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 r = 0;
2338 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002339 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 if (!memcmp(s+i, sub, n)) {
2341 r++;
2342 i += n;
2343 } else {
2344 i++;
2345 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002346 if (i >= m)
2347 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002348 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002349 if (t == NULL)
2350 break;
2351 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002353#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002354 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355}
2356
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002357PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358"S.swapcase() -> string\n\
2359\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002361converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362
2363static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002364string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365{
2366 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002367 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002368 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369
Anthony Baxtera6286212006-04-11 07:42:36 +00002370 newobj = PyString_FromStringAndSize(NULL, n);
2371 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002373 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 for (i = 0; i < n; i++) {
2375 int c = Py_CHARMASK(*s++);
2376 if (islower(c)) {
2377 *s_new = toupper(c);
2378 }
2379 else if (isupper(c)) {
2380 *s_new = tolower(c);
2381 }
2382 else
2383 *s_new = c;
2384 s_new++;
2385 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002386 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387}
2388
2389
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002390PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391"S.translate(table [,deletechars]) -> string\n\
2392\n\
2393Return a copy of the string S, where all characters occurring\n\
2394in the optional argument deletechars are removed, and the\n\
2395remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002396translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002397
2398static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002399string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 register char *input, *output;
2402 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002403 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002406 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 PyObject *result;
2408 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002411 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002414
2415 if (PyString_Check(tableobj)) {
2416 table1 = PyString_AS_STRING(tableobj);
2417 tablen = PyString_GET_SIZE(tableobj);
2418 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002419#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002421 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422 parameter; instead a mapping to None will cause characters
2423 to be deleted. */
2424 if (delobj != NULL) {
2425 PyErr_SetString(PyExc_TypeError,
2426 "deletions are implemented differently for unicode");
2427 return NULL;
2428 }
2429 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2430 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002431#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434
Martin v. Löwis00b61272002-12-12 20:03:19 +00002435 if (tablen != 256) {
2436 PyErr_SetString(PyExc_ValueError,
2437 "translation table must be 256 characters long");
2438 return NULL;
2439 }
2440
Guido van Rossum4c08d552000-03-10 22:55:18 +00002441 if (delobj != NULL) {
2442 if (PyString_Check(delobj)) {
2443 del_table = PyString_AS_STRING(delobj);
2444 dellen = PyString_GET_SIZE(delobj);
2445 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002446#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447 else if (PyUnicode_Check(delobj)) {
2448 PyErr_SetString(PyExc_TypeError,
2449 "deletions are implemented differently for unicode");
2450 return NULL;
2451 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002452#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2454 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002455 }
2456 else {
2457 del_table = NULL;
2458 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459 }
2460
2461 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002462 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 result = PyString_FromStringAndSize((char *)NULL, inlen);
2464 if (result == NULL)
2465 return NULL;
2466 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002467 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002468
2469 if (dellen == 0) {
2470 /* If no deletions are required, use faster code */
2471 for (i = inlen; --i >= 0; ) {
2472 c = Py_CHARMASK(*input++);
2473 if (Py_CHARMASK((*output++ = table[c])) != c)
2474 changed = 1;
2475 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002476 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002477 return result;
2478 Py_DECREF(result);
2479 Py_INCREF(input_obj);
2480 return input_obj;
2481 }
2482
2483 for (i = 0; i < 256; i++)
2484 trans_table[i] = Py_CHARMASK(table[i]);
2485
2486 for (i = 0; i < dellen; i++)
2487 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2488
2489 for (i = inlen; --i >= 0; ) {
2490 c = Py_CHARMASK(*input++);
2491 if (trans_table[c] != -1)
2492 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2493 continue;
2494 changed = 1;
2495 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002496 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002497 Py_DECREF(result);
2498 Py_INCREF(input_obj);
2499 return input_obj;
2500 }
2501 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002502 if (inlen > 0)
2503 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002504 return result;
2505}
2506
2507
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002508#define FORWARD 1
2509#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002511/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002513/* Don't call if length < 2 */
2514#define Py_STRING_MATCH(target, offset, pattern, length) \
2515 (target[offset] == pattern[0] && \
2516 target[offset+length-1] == pattern[length-1] && \
2517 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002518
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002519#define findchar(target, target_len, c) \
2520 ((char *)memchr((const void *)(target), c, target_len))
2521
2522/* String ops must return a string. */
2523/* If the object is subclass of string, create a copy */
2524static PyStringObject *
2525return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002527 if (PyString_CheckExact(self)) {
2528 Py_INCREF(self);
2529 return self;
2530 }
2531 return (PyStringObject *)PyString_FromStringAndSize(
2532 PyString_AS_STRING(self),
2533 PyString_GET_SIZE(self));
2534}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002536static Py_ssize_t
2537countchar(char *target, int target_len, char c)
2538{
2539 Py_ssize_t count=0;
2540 char *start=target;
2541 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002542
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543 while ( (start=findchar(start, end-start, c)) != NULL ) {
2544 count++;
2545 start += 1;
2546 }
2547
2548 return count;
2549}
2550
2551static Py_ssize_t
2552findstring(char *target, Py_ssize_t target_len,
2553 char *pattern, Py_ssize_t pattern_len,
2554 Py_ssize_t start,
2555 Py_ssize_t end,
2556 int direction)
2557{
2558 if (start < 0) {
2559 start += target_len;
2560 if (start < 0)
2561 start = 0;
2562 }
2563 if (end > target_len) {
2564 end = target_len;
2565 } else if (end < 0) {
2566 end += target_len;
2567 if (end < 0)
2568 end = 0;
2569 }
2570
2571 /* zero-length substrings always match at the first attempt */
2572 if (pattern_len == 0)
2573 return (direction > 0) ? start : end;
2574
2575 end -= pattern_len;
2576
2577 if (direction < 0) {
2578 for (; end >= start; end--)
2579 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2580 return end;
2581 } else {
2582 for (; start <= end; start++)
2583 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2584 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585 }
2586 return -1;
2587}
2588
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589Py_ssize_t
2590countstring(char *target, Py_ssize_t target_len,
2591 char *pattern, Py_ssize_t pattern_len,
2592 Py_ssize_t start,
2593 Py_ssize_t end,
2594 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002595{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002596 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002597
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598 if (start < 0) {
2599 start += target_len;
2600 if (start < 0)
2601 start = 0;
2602 }
2603 if (end > target_len) {
2604 end = target_len;
2605 } else if (end < 0) {
2606 end += target_len;
2607 if (end < 0)
2608 end = 0;
2609 }
2610
2611 /* zero-length substrings match everywhere */
2612 if (pattern_len == 0)
2613 return target_len+1;
2614
2615 end -= pattern_len;
2616
2617 if (direction < 0) {
2618 for (; end >= start; end--)
2619 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2620 count++;
2621 end -= pattern_len-1;
2622 }
2623 } else {
2624 for (; start <= end; start++)
2625 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2626 count++;
2627 start += pattern_len-1;
2628 }
2629 }
2630 return count;
2631}
2632
2633
2634/* Algorithms for difference cases of string replacement */
2635
2636/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2637static PyStringObject *
2638replace_interleave(PyStringObject *self,
2639 PyStringObject *to,
2640 Py_ssize_t maxcount)
2641{
2642 char *self_s, *to_s, *result_s;
2643 Py_ssize_t self_len, to_len, result_len;
2644 Py_ssize_t count, i, product;
2645 PyStringObject *result;
2646
2647 self_len = PyString_GET_SIZE(self);
2648 to_len = PyString_GET_SIZE(to);
2649
2650 /* 1 at the end plus 1 after every character */
2651 count = self_len+1;
2652 if (maxcount < count)
2653 count = maxcount;
2654
2655 /* Check for overflow */
2656 /* result_len = count * to_len + self_len; */
2657 product = count * to_len;
2658 if (product / to_len != count) {
2659 PyErr_SetString(PyExc_OverflowError,
2660 "replace string is too long");
2661 return NULL;
2662 }
2663 result_len = product + self_len;
2664 if (result_len < 0) {
2665 PyErr_SetString(PyExc_OverflowError,
2666 "replace string is too long");
2667 return NULL;
2668 }
2669
2670 if (! (result = (PyStringObject *)
2671 PyString_FromStringAndSize(NULL, result_len)) )
2672 return NULL;
2673
2674 self_s = PyString_AS_STRING(self);
2675 to_s = PyString_AS_STRING(to);
2676 to_len = PyString_GET_SIZE(to);
2677 result_s = PyString_AS_STRING(result);
2678
2679 /* TODO: special case single character, which doesn't need memcpy */
2680
2681 /* Lay the first one down (guaranteed this will occur) */
2682 memcpy(result_s, to_s, to_len);
2683 result_s += to_len;
2684 count -= 1;
2685
2686 for (i=0; i<count; i++) {
2687 *result_s++ = *self_s++;
2688 memcpy(result_s, to_s, to_len);
2689 result_s += to_len;
2690 }
2691
2692 /* Copy the rest of the original string */
2693 memcpy(result_s, self_s, self_len-i);
2694
2695 return result;
2696}
2697
2698/* Special case for deleting a single character */
2699/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2700static PyStringObject *
2701replace_delete_single_character(PyStringObject *self,
2702 char from_c, Py_ssize_t maxcount)
2703{
2704 char *self_s, *result_s;
2705 char *start, *next, *end;
2706 Py_ssize_t self_len, result_len;
2707 Py_ssize_t count;
2708 PyStringObject *result;
2709
2710 self_len = PyString_GET_SIZE(self);
2711 self_s = PyString_AS_STRING(self);
2712
2713 count = countchar(self_s, self_len, from_c);
2714 if (count == 0) {
2715 return return_self(self);
2716 }
2717 if (count > maxcount)
2718 count = maxcount;
2719
2720 result_len = self_len - count; /* from_len == 1 */
2721 assert(result_len>=0);
2722
2723 if ( (result = (PyStringObject *)
2724 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2725 return NULL;
2726 result_s = PyString_AS_STRING(result);
2727
2728 start = self_s;
2729 end = self_s + self_len;
2730 while (count-- > 0) {
2731 next = findchar(start, end-start, from_c);
2732 if (next == NULL)
2733 break;
2734 memcpy(result_s, start, next-start);
2735 result_s += (next-start);
2736 start = next+1;
2737 }
2738 memcpy(result_s, start, end-start);
2739
2740 return result;
2741}
2742
2743/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2744
2745static PyStringObject *
2746replace_delete_substring(PyStringObject *self, PyStringObject *from,
2747 Py_ssize_t maxcount) {
2748 char *self_s, *from_s, *result_s;
2749 char *start, *next, *end;
2750 Py_ssize_t self_len, from_len, result_len;
2751 Py_ssize_t count, offset;
2752 PyStringObject *result;
2753
2754 self_len = PyString_GET_SIZE(self);
2755 self_s = PyString_AS_STRING(self);
2756 from_len = PyString_GET_SIZE(from);
2757 from_s = PyString_AS_STRING(from);
2758
2759 count = countstring(self_s, self_len,
2760 from_s, from_len,
2761 0, self_len, 1);
2762
2763 if (count > maxcount)
2764 count = maxcount;
2765
2766 if (count == 0) {
2767 /* no matches */
2768 return return_self(self);
2769 }
2770
2771 result_len = self_len - (count * from_len);
2772 assert (result_len>=0);
2773
2774 if ( (result = (PyStringObject *)
2775 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2776 return NULL;
2777
2778 result_s = PyString_AS_STRING(result);
2779
2780 start = self_s;
2781 end = self_s + self_len;
2782 while (count-- > 0) {
2783 offset = findstring(start, end-start,
2784 from_s, from_len,
2785 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002786 if (offset == -1)
2787 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 next = start + offset;
2789
2790 memcpy(result_s, start, next-start);
2791
2792 result_s += (next-start);
2793 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002794 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 memcpy(result_s, start, end-start);
2796 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002797}
2798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2800static PyStringObject *
2801replace_single_character_in_place(PyStringObject *self,
2802 char from_c, char to_c,
2803 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002804{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 char *self_s, *result_s, *start, *end, *next;
2806 Py_ssize_t self_len;
2807 PyStringObject *result;
2808
2809 /* The result string will be the same size */
2810 self_s = PyString_AS_STRING(self);
2811 self_len = PyString_GET_SIZE(self);
2812
2813 next = findchar(self_s, self_len, from_c);
2814
2815 if (next == NULL) {
2816 /* No matches; return the original string */
2817 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002818 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819
2820 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002821 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 if (result == NULL)
2823 return NULL;
2824 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002825 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002826
2827 /* change everything in-place, starting with this one */
2828 start = result_s + (next-self_s);
2829 *start = to_c;
2830 start++;
2831 end = result_s + self_len;
2832
2833 while (--maxcount > 0) {
2834 next = findchar(start, end-start, from_c);
2835 if (next == NULL)
2836 break;
2837 *next = to_c;
2838 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002839 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840
2841 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002842}
2843
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2845static PyStringObject *
2846replace_substring_in_place(PyStringObject *self,
2847 PyStringObject *from,
2848 PyStringObject *to,
2849 Py_ssize_t maxcount)
2850{
2851 char *result_s, *start, *end;
2852 char *self_s, *from_s, *to_s;
2853 Py_ssize_t self_len, from_len, offset;
2854 PyStringObject *result;
2855
2856 /* The result string will be the same size */
2857
2858 self_s = PyString_AS_STRING(self);
2859 self_len = PyString_GET_SIZE(self);
2860
2861 from_s = PyString_AS_STRING(from);
2862 from_len = PyString_GET_SIZE(from);
2863 to_s = PyString_AS_STRING(to);
2864
2865 offset = findstring(self_s, self_len,
2866 from_s, from_len,
2867 0, self_len, FORWARD);
2868
2869 if (offset == -1) {
2870 /* No matches; return the original string */
2871 return return_self(self);
2872 }
2873
2874 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002875 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876 if (result == NULL)
2877 return NULL;
2878 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002879 memcpy(result_s, self_s, self_len);
2880
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881
2882 /* change everything in-place, starting with this one */
2883 start = result_s + offset;
2884 memcpy(start, to_s, from_len);
2885 start += from_len;
2886 end = result_s + self_len;
2887
2888 while ( --maxcount > 0) {
2889 offset = findstring(start, end-start,
2890 from_s, from_len,
2891 0, end-start, FORWARD);
2892 if (offset==-1)
2893 break;
2894 memcpy(start+offset, to_s, from_len);
2895 start += offset+from_len;
2896 }
2897
2898 return result;
2899}
2900
2901/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2902static PyStringObject *
2903replace_single_character(PyStringObject *self,
2904 char from_c,
2905 PyStringObject *to,
2906 Py_ssize_t maxcount)
2907{
2908 char *self_s, *to_s, *result_s;
2909 char *start, *next, *end;
2910 Py_ssize_t self_len, to_len, result_len;
2911 Py_ssize_t count, product;
2912 PyStringObject *result;
2913
2914 self_s = PyString_AS_STRING(self);
2915 self_len = PyString_GET_SIZE(self);
2916
2917 count = countchar(self_s, self_len, from_c);
2918 if (count > maxcount)
2919 count = maxcount;
2920
2921 if (count == 0) {
2922 /* no matches, return unchanged */
2923 return return_self(self);
2924 }
2925
2926 to_s = PyString_AS_STRING(to);
2927 to_len = PyString_GET_SIZE(to);
2928
2929 /* use the difference between current and new, hence the "-1" */
2930 /* result_len = self_len + count * (to_len-1) */
2931 product = count * (to_len-1);
2932 if (product / (to_len-1) != count) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
2936 result_len = self_len + product;
2937 if (result_len < 0) {
2938 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2939 return NULL;
2940 }
2941
2942 if ( (result = (PyStringObject *)
2943 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2944 return NULL;
2945 result_s = PyString_AS_STRING(result);
2946
2947 start = self_s;
2948 end = self_s + self_len;
2949 while (count-- > 0) {
2950 next = findchar(start, end-start, from_c);
2951 if (next == NULL)
2952 break;
2953
2954 if (next == start) {
2955 /* replace with the 'to' */
2956 memcpy(result_s, to_s, to_len);
2957 result_s += to_len;
2958 start += 1;
2959 } else {
2960 /* copy the unchanged old then the 'to' */
2961 memcpy(result_s, start, next-start);
2962 result_s += (next-start);
2963 memcpy(result_s, to_s, to_len);
2964 result_s += to_len;
2965 start = next+1;
2966 }
2967 }
2968 /* Copy the remainder of the remaining string */
2969 memcpy(result_s, start, end-start);
2970
2971 return result;
2972}
2973
2974/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2975static PyStringObject *
2976replace_substring(PyStringObject *self,
2977 PyStringObject *from,
2978 PyStringObject *to,
2979 Py_ssize_t maxcount) {
2980 char *self_s, *from_s, *to_s, *result_s;
2981 char *start, *next, *end;
2982 Py_ssize_t self_len, from_len, to_len, result_len;
2983 Py_ssize_t count, offset, product;
2984 PyStringObject *result;
2985
2986 self_s = PyString_AS_STRING(self);
2987 self_len = PyString_GET_SIZE(self);
2988 from_s = PyString_AS_STRING(from);
2989 from_len = PyString_GET_SIZE(from);
2990
2991 count = countstring(self_s, self_len,
2992 from_s, from_len,
2993 0, self_len, FORWARD);
2994 if (count > maxcount)
2995 count = maxcount;
2996
2997 if (count == 0) {
2998 /* no matches, return unchanged */
2999 return return_self(self);
3000 }
3001
3002 to_s = PyString_AS_STRING(to);
3003 to_len = PyString_GET_SIZE(to);
3004
3005 /* Check for overflow */
3006 /* result_len = self_len + count * (to_len-from_len) */
3007 product = count * (to_len-from_len);
3008 if (product / (to_len-from_len) != count) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012 result_len = self_len + product;
3013 if (result_len < 0) {
3014 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015 return NULL;
3016 }
3017
3018 if ( (result = (PyStringObject *)
3019 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3020 return NULL;
3021 result_s = PyString_AS_STRING(result);
3022
3023 start = self_s;
3024 end = self_s + self_len;
3025 while (count-- > 0) {
3026 offset = findstring(start, end-start,
3027 from_s, from_len,
3028 0, end-start, FORWARD);
3029 if (offset == -1)
3030 break;
3031 next = start+offset;
3032 if (next == start) {
3033 /* replace with the 'to' */
3034 memcpy(result_s, to_s, to_len);
3035 result_s += to_len;
3036 start += from_len;
3037 } else {
3038 /* copy the unchanged old then the 'to' */
3039 memcpy(result_s, start, next-start);
3040 result_s += (next-start);
3041 memcpy(result_s, to_s, to_len);
3042 result_s += to_len;
3043 start = next+from_len;
3044 }
3045 }
3046 /* Copy the remainder of the remaining string */
3047 memcpy(result_s, start, end-start);
3048
3049 return result;
3050}
3051
3052
3053static PyStringObject *
3054replace(PyStringObject *self,
3055 PyStringObject *from,
3056 PyStringObject *to,
3057 Py_ssize_t maxcount)
3058{
3059 Py_ssize_t from_len, to_len;
3060
3061 if (maxcount < 0) {
3062 maxcount = PY_SSIZE_T_MAX;
3063 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3064 /* nothing to do; return the original string */
3065 return return_self(self);
3066 }
3067
3068 from_len = PyString_GET_SIZE(from);
3069 to_len = PyString_GET_SIZE(to);
3070
3071 if (maxcount == 0 ||
3072 (from_len == 0 && to_len == 0)) {
3073 /* nothing to do; return the original string */
3074 return return_self(self);
3075 }
3076
3077 /* Handle zero-length special cases */
3078
3079 if (from_len == 0) {
3080 /* insert the 'to' string everywhere. */
3081 /* >>> "Python".replace("", ".") */
3082 /* '.P.y.t.h.o.n.' */
3083 return replace_interleave(self, to, maxcount);
3084 }
3085
3086 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3087 /* point for an empty self string to generate a non-empty string */
3088 /* Special case so the remaining code always gets a non-empty string */
3089 if (PyString_GET_SIZE(self) == 0) {
3090 return return_self(self);
3091 }
3092
3093 if (to_len == 0) {
3094 /* delete all occurances of 'from' string */
3095 if (from_len == 1) {
3096 return replace_delete_single_character(
3097 self, PyString_AS_STRING(from)[0], maxcount);
3098 } else {
3099 return replace_delete_substring(self, from, maxcount);
3100 }
3101 }
3102
3103 /* Handle special case where both strings have the same length */
3104
3105 if (from_len == to_len) {
3106 if (from_len == 1) {
3107 return replace_single_character_in_place(
3108 self,
3109 PyString_AS_STRING(from)[0],
3110 PyString_AS_STRING(to)[0],
3111 maxcount);
3112 } else {
3113 return replace_substring_in_place(
3114 self, from, to, maxcount);
3115 }
3116 }
3117
3118 /* Otherwise use the more generic algorithms */
3119 if (from_len == 1) {
3120 return replace_single_character(self, PyString_AS_STRING(from)[0],
3121 to, maxcount);
3122 } else {
3123 /* len('from')>=2, len('to')>=1 */
3124 return replace_substring(self, from, to, maxcount);
3125 }
3126}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003128PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003129"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003130\n\
3131Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003132old replaced by new. If the optional argument count is\n\
3133given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003134
3135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003136string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003138 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003139 PyObject *from, *to;
3140 char *tmp_s;
3141 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003142
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003143 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003146 if (PyString_Check(from)) {
3147 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003149#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003150 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003151 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003152 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003153#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003154 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 return NULL;
3156
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003157 if (PyString_Check(to)) {
3158 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003160#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003161 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003162 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003164#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003165 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166 return NULL;
3167
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003168 return (PyObject *)replace((PyStringObject *) self,
3169 (PyStringObject *) from,
3170 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171}
3172
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003173/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003175PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003176"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003178Return True if S starts with the specified prefix, False otherwise.\n\
3179With optional start, test S beginning at that position.\n\
3180With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181
3182static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003183string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003184{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003185 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003186 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003188 Py_ssize_t plen;
3189 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003190 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003192
Guido van Rossumc6821402000-05-08 14:08:05 +00003193 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3194 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 return NULL;
3196 if (PyString_Check(subobj)) {
3197 prefix = PyString_AS_STRING(subobj);
3198 plen = PyString_GET_SIZE(subobj);
3199 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003200#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003201 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003202 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003203 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003204 subobj, start, end, -1);
3205 if (rc == -1)
3206 return NULL;
3207 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003208 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003209 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003210#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003211 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212 return NULL;
3213
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 string_adjust_indices(&start, &end, len);
3215
3216 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003217 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003219 if (end-start >= plen)
3220 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3221 else
3222 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223}
3224
3225
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003226PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003227"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003228\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003229Return True if S ends with the specified suffix, False otherwise.\n\
3230With optional start, test S beginning at that position.\n\
3231With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003232
3233static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003234string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003235{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003237 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003239 Py_ssize_t slen;
3240 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003241 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003242 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003243
Guido van Rossumc6821402000-05-08 14:08:05 +00003244 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3245 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003246 return NULL;
3247 if (PyString_Check(subobj)) {
3248 suffix = PyString_AS_STRING(subobj);
3249 slen = PyString_GET_SIZE(subobj);
3250 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003251#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003252 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003253 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003254 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003255 subobj, start, end, +1);
3256 if (rc == -1)
3257 return NULL;
3258 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003259 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003260 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003261#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003263 return NULL;
3264
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003265 string_adjust_indices(&start, &end, len);
3266
3267 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003268 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003269
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003270 if (end-slen > start)
3271 start = end - slen;
3272 if (end-start >= slen)
3273 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3274 else
3275 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003276}
3277
3278
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003279PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003280"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003281\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003282Encodes S using the codec registered for encoding. encoding defaults\n\
3283to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003284handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003285a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3286'xmlcharrefreplace' as well as any other name registered with\n\
3287codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003288
3289static PyObject *
3290string_encode(PyStringObject *self, PyObject *args)
3291{
3292 char *encoding = NULL;
3293 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003294 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003295
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003296 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3297 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003298 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003299 if (v == NULL)
3300 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003301 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3302 PyErr_Format(PyExc_TypeError,
3303 "encoder did not return a string/unicode object "
3304 "(type=%.400s)",
3305 v->ob_type->tp_name);
3306 Py_DECREF(v);
3307 return NULL;
3308 }
3309 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003310
3311 onError:
3312 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003313}
3314
3315
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003316PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003317"S.decode([encoding[,errors]]) -> object\n\
3318\n\
3319Decodes S using the codec registered for encoding. encoding defaults\n\
3320to the default encoding. errors may be given to set a different error\n\
3321handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003322a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3323as well as any other name registerd with codecs.register_error that is\n\
3324able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003325
3326static PyObject *
3327string_decode(PyStringObject *self, PyObject *args)
3328{
3329 char *encoding = NULL;
3330 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003331 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003332
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003333 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3334 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003335 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003336 if (v == NULL)
3337 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003338 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3339 PyErr_Format(PyExc_TypeError,
3340 "decoder did not return a string/unicode object "
3341 "(type=%.400s)",
3342 v->ob_type->tp_name);
3343 Py_DECREF(v);
3344 return NULL;
3345 }
3346 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003347
3348 onError:
3349 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003350}
3351
3352
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003353PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003354"S.expandtabs([tabsize]) -> string\n\
3355\n\
3356Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003357If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358
3359static PyObject*
3360string_expandtabs(PyStringObject *self, PyObject *args)
3361{
3362 const char *e, *p;
3363 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003364 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003365 PyObject *u;
3366 int tabsize = 8;
3367
3368 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3369 return NULL;
3370
Thomas Wouters7e474022000-07-16 12:04:32 +00003371 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372 i = j = 0;
3373 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3374 for (p = PyString_AS_STRING(self); p < e; p++)
3375 if (*p == '\t') {
3376 if (tabsize > 0)
3377 j += tabsize - (j % tabsize);
3378 }
3379 else {
3380 j++;
3381 if (*p == '\n' || *p == '\r') {
3382 i += j;
3383 j = 0;
3384 }
3385 }
3386
3387 /* Second pass: create output string and fill it */
3388 u = PyString_FromStringAndSize(NULL, i + j);
3389 if (!u)
3390 return NULL;
3391
3392 j = 0;
3393 q = PyString_AS_STRING(u);
3394
3395 for (p = PyString_AS_STRING(self); p < e; p++)
3396 if (*p == '\t') {
3397 if (tabsize > 0) {
3398 i = tabsize - (j % tabsize);
3399 j += i;
3400 while (i--)
3401 *q++ = ' ';
3402 }
3403 }
3404 else {
3405 j++;
3406 *q++ = *p;
3407 if (*p == '\n' || *p == '\r')
3408 j = 0;
3409 }
3410
3411 return u;
3412}
3413
Tim Peters8fa5dd02001-09-12 02:18:30 +00003414static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003415pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003416{
3417 PyObject *u;
3418
3419 if (left < 0)
3420 left = 0;
3421 if (right < 0)
3422 right = 0;
3423
Tim Peters8fa5dd02001-09-12 02:18:30 +00003424 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 Py_INCREF(self);
3426 return (PyObject *)self;
3427 }
3428
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003429 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430 left + PyString_GET_SIZE(self) + right);
3431 if (u) {
3432 if (left)
3433 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003434 memcpy(PyString_AS_STRING(u) + left,
3435 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436 PyString_GET_SIZE(self));
3437 if (right)
3438 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3439 fill, right);
3440 }
3441
3442 return u;
3443}
3444
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003445PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003446"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003447"\n"
3448"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003449"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003450
3451static PyObject *
3452string_ljust(PyStringObject *self, PyObject *args)
3453{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003454 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455 char fillchar = ' ';
3456
Thomas Wouters4abb3662006-04-19 14:50:15 +00003457 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458 return NULL;
3459
Tim Peters8fa5dd02001-09-12 02:18:30 +00003460 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461 Py_INCREF(self);
3462 return (PyObject*) self;
3463 }
3464
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003465 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466}
3467
3468
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003469PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003470"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003471"\n"
3472"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474
3475static PyObject *
3476string_rjust(PyStringObject *self, PyObject *args)
3477{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003478 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003479 char fillchar = ' ';
3480
Thomas Wouters4abb3662006-04-19 14:50:15 +00003481 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003482 return NULL;
3483
Tim Peters8fa5dd02001-09-12 02:18:30 +00003484 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003485 Py_INCREF(self);
3486 return (PyObject*) self;
3487 }
3488
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003490}
3491
3492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003493PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003494"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003495"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003496"Return S centered in a string of length width. Padding is\n"
3497"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498
3499static PyObject *
3500string_center(PyStringObject *self, PyObject *args)
3501{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003502 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003503 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003504 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505
Thomas Wouters4abb3662006-04-19 14:50:15 +00003506 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003507 return NULL;
3508
Tim Peters8fa5dd02001-09-12 02:18:30 +00003509 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510 Py_INCREF(self);
3511 return (PyObject*) self;
3512 }
3513
3514 marg = width - PyString_GET_SIZE(self);
3515 left = marg / 2 + (marg & width & 1);
3516
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003517 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518}
3519
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003520PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003521"S.zfill(width) -> string\n"
3522"\n"
3523"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003524"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003525
3526static PyObject *
3527string_zfill(PyStringObject *self, PyObject *args)
3528{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003529 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003530 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003531 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003532 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003533
Thomas Wouters4abb3662006-04-19 14:50:15 +00003534 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003535 return NULL;
3536
3537 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003538 if (PyString_CheckExact(self)) {
3539 Py_INCREF(self);
3540 return (PyObject*) self;
3541 }
3542 else
3543 return PyString_FromStringAndSize(
3544 PyString_AS_STRING(self),
3545 PyString_GET_SIZE(self)
3546 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003547 }
3548
3549 fill = width - PyString_GET_SIZE(self);
3550
3551 s = pad(self, fill, 0, '0');
3552
3553 if (s == NULL)
3554 return NULL;
3555
3556 p = PyString_AS_STRING(s);
3557 if (p[fill] == '+' || p[fill] == '-') {
3558 /* move sign to beginning of string */
3559 p[0] = p[fill];
3560 p[fill] = '0';
3561 }
3562
3563 return (PyObject*) s;
3564}
3565
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003566PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003567"S.isspace() -> bool\n\
3568\n\
3569Return True if all characters in S are whitespace\n\
3570and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003571
3572static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003573string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574{
Fred Drakeba096332000-07-09 07:04:36 +00003575 register const unsigned char *p
3576 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003577 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578
Guido van Rossum4c08d552000-03-10 22:55:18 +00003579 /* Shortcut for single character strings */
3580 if (PyString_GET_SIZE(self) == 1 &&
3581 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003584 /* Special case for empty strings */
3585 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003587
Guido van Rossum4c08d552000-03-10 22:55:18 +00003588 e = p + PyString_GET_SIZE(self);
3589 for (; p < e; p++) {
3590 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003591 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003594}
3595
3596
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003597PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003600Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003601and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602
3603static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003604string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605{
Fred Drakeba096332000-07-09 07:04:36 +00003606 register const unsigned char *p
3607 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003608 register const unsigned char *e;
3609
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610 /* Shortcut for single character strings */
3611 if (PyString_GET_SIZE(self) == 1 &&
3612 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003614
3615 /* Special case for empty strings */
3616 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618
3619 e = p + PyString_GET_SIZE(self);
3620 for (; p < e; p++) {
3621 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003622 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003625}
3626
3627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003628PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003630\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003631Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003632and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003633
3634static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003635string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003636{
Fred Drakeba096332000-07-09 07:04:36 +00003637 register const unsigned char *p
3638 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003639 register const unsigned char *e;
3640
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003641 /* Shortcut for single character strings */
3642 if (PyString_GET_SIZE(self) == 1 &&
3643 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003645
3646 /* Special case for empty strings */
3647 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649
3650 e = p + PyString_GET_SIZE(self);
3651 for (; p < e; p++) {
3652 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003654 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003656}
3657
3658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003659PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003662Return True if all characters in S are digits\n\
3663and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664
3665static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003666string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667{
Fred Drakeba096332000-07-09 07:04:36 +00003668 register const unsigned char *p
3669 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003670 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 /* Shortcut for single character strings */
3673 if (PyString_GET_SIZE(self) == 1 &&
3674 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003677 /* Special case for empty strings */
3678 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003680
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681 e = p + PyString_GET_SIZE(self);
3682 for (; p < e; p++) {
3683 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003686 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687}
3688
3689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003690PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003693Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003694at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695
3696static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003697string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698{
Fred Drakeba096332000-07-09 07:04:36 +00003699 register const unsigned char *p
3700 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003701 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 int cased;
3703
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704 /* Shortcut for single character strings */
3705 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003708 /* Special case for empty strings */
3709 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003711
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 e = p + PyString_GET_SIZE(self);
3713 cased = 0;
3714 for (; p < e; p++) {
3715 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717 else if (!cased && islower(*p))
3718 cased = 1;
3719 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721}
3722
3723
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003724PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003727Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003728at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729
3730static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003731string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732{
Fred Drakeba096332000-07-09 07:04:36 +00003733 register const unsigned char *p
3734 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003735 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736 int cased;
3737
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 /* Shortcut for single character strings */
3739 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003740 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003742 /* Special case for empty strings */
3743 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003745
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 e = p + PyString_GET_SIZE(self);
3747 cased = 0;
3748 for (; p < e; p++) {
3749 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003750 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751 else if (!cased && isupper(*p))
3752 cased = 1;
3753 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755}
3756
3757
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003758PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003761Return True if S is a titlecased string and there is at least one\n\
3762character in S, i.e. uppercase characters may only follow uncased\n\
3763characters and lowercase characters only cased ones. Return False\n\
3764otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765
3766static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003767string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768{
Fred Drakeba096332000-07-09 07:04:36 +00003769 register const unsigned char *p
3770 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003771 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772 int cased, previous_is_cased;
3773
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 /* Shortcut for single character strings */
3775 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003776 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003778 /* Special case for empty strings */
3779 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003780 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003781
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 e = p + PyString_GET_SIZE(self);
3783 cased = 0;
3784 previous_is_cased = 0;
3785 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003786 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787
3788 if (isupper(ch)) {
3789 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003790 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003791 previous_is_cased = 1;
3792 cased = 1;
3793 }
3794 else if (islower(ch)) {
3795 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003796 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 previous_is_cased = 1;
3798 cased = 1;
3799 }
3800 else
3801 previous_is_cased = 0;
3802 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003803 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804}
3805
3806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003807PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003808"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809\n\
3810Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003811Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003812is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814static PyObject*
3815string_splitlines(PyStringObject *self, PyObject *args)
3816{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003817 register Py_ssize_t i;
3818 register Py_ssize_t j;
3819 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003820 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 PyObject *list;
3822 PyObject *str;
3823 char *data;
3824
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003825 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 return NULL;
3827
3828 data = PyString_AS_STRING(self);
3829 len = PyString_GET_SIZE(self);
3830
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831 list = PyList_New(0);
3832 if (!list)
3833 goto onError;
3834
3835 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003836 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003837
Guido van Rossum4c08d552000-03-10 22:55:18 +00003838 /* Find a line and append it */
3839 while (i < len && data[i] != '\n' && data[i] != '\r')
3840 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841
3842 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003843 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844 if (i < len) {
3845 if (data[i] == '\r' && i + 1 < len &&
3846 data[i+1] == '\n')
3847 i += 2;
3848 else
3849 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003850 if (keepends)
3851 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003852 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003853 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003854 j = i;
3855 }
3856 if (j < len) {
3857 SPLIT_APPEND(data, j, len);
3858 }
3859
3860 return list;
3861
3862 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003863 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003864 return NULL;
3865}
3866
3867#undef SPLIT_APPEND
3868
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003869static PyObject *
3870string_getnewargs(PyStringObject *v)
3871{
3872 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3873}
3874
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003875
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003876static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003877string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003878 /* Counterparts of the obsolete stropmodule functions; except
3879 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003880 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3881 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003882 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003883 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3884 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003885 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3886 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3887 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3888 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3889 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3890 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3891 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003892 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3893 capitalize__doc__},
3894 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3895 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3896 endswith__doc__},
3897 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3898 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3899 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3900 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3901 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3902 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3903 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3904 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3905 startswith__doc__},
3906 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3907 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3908 swapcase__doc__},
3909 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3910 translate__doc__},
3911 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3912 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3913 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3914 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3915 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3916 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3917 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3918 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3919 expandtabs__doc__},
3920 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3921 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003922 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003923 {NULL, NULL} /* sentinel */
3924};
3925
Jeremy Hylton938ace62002-07-17 16:30:39 +00003926static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003927str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3928
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003929static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003930string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003931{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003932 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003933 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003934
Guido van Rossumae960af2001-08-30 03:11:59 +00003935 if (type != &PyString_Type)
3936 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003937 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3938 return NULL;
3939 if (x == NULL)
3940 return PyString_FromString("");
3941 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003942}
3943
Guido van Rossumae960af2001-08-30 03:11:59 +00003944static PyObject *
3945str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3946{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003947 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003948 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003949
3950 assert(PyType_IsSubtype(type, &PyString_Type));
3951 tmp = string_new(&PyString_Type, args, kwds);
3952 if (tmp == NULL)
3953 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003954 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003955 n = PyString_GET_SIZE(tmp);
3956 pnew = type->tp_alloc(type, n);
3957 if (pnew != NULL) {
3958 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003959 ((PyStringObject *)pnew)->ob_shash =
3960 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003961 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003962 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003963 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003964 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003965}
3966
Guido van Rossumcacfc072002-05-24 19:01:59 +00003967static PyObject *
3968basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3969{
3970 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003971 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003972 return NULL;
3973}
3974
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003975static PyObject *
3976string_mod(PyObject *v, PyObject *w)
3977{
3978 if (!PyString_Check(v)) {
3979 Py_INCREF(Py_NotImplemented);
3980 return Py_NotImplemented;
3981 }
3982 return PyString_Format(v, w);
3983}
3984
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003985PyDoc_STRVAR(basestring_doc,
3986"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003987
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003988static PyNumberMethods string_as_number = {
3989 0, /*nb_add*/
3990 0, /*nb_subtract*/
3991 0, /*nb_multiply*/
3992 0, /*nb_divide*/
3993 string_mod, /*nb_remainder*/
3994};
3995
3996
Guido van Rossumcacfc072002-05-24 19:01:59 +00003997PyTypeObject PyBaseString_Type = {
3998 PyObject_HEAD_INIT(&PyType_Type)
3999 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004000 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004001 0,
4002 0,
4003 0, /* tp_dealloc */
4004 0, /* tp_print */
4005 0, /* tp_getattr */
4006 0, /* tp_setattr */
4007 0, /* tp_compare */
4008 0, /* tp_repr */
4009 0, /* tp_as_number */
4010 0, /* tp_as_sequence */
4011 0, /* tp_as_mapping */
4012 0, /* tp_hash */
4013 0, /* tp_call */
4014 0, /* tp_str */
4015 0, /* tp_getattro */
4016 0, /* tp_setattro */
4017 0, /* tp_as_buffer */
4018 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4019 basestring_doc, /* tp_doc */
4020 0, /* tp_traverse */
4021 0, /* tp_clear */
4022 0, /* tp_richcompare */
4023 0, /* tp_weaklistoffset */
4024 0, /* tp_iter */
4025 0, /* tp_iternext */
4026 0, /* tp_methods */
4027 0, /* tp_members */
4028 0, /* tp_getset */
4029 &PyBaseObject_Type, /* tp_base */
4030 0, /* tp_dict */
4031 0, /* tp_descr_get */
4032 0, /* tp_descr_set */
4033 0, /* tp_dictoffset */
4034 0, /* tp_init */
4035 0, /* tp_alloc */
4036 basestring_new, /* tp_new */
4037 0, /* tp_free */
4038};
4039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004040PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004041"str(object) -> string\n\
4042\n\
4043Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004044If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004045
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046PyTypeObject PyString_Type = {
4047 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004048 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004049 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004050 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004051 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004052 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004053 (printfunc)string_print, /* tp_print */
4054 0, /* tp_getattr */
4055 0, /* tp_setattr */
4056 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004057 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004058 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004059 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004060 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004061 (hashfunc)string_hash, /* tp_hash */
4062 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004063 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004064 PyObject_GenericGetAttr, /* tp_getattro */
4065 0, /* tp_setattro */
4066 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004067 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004068 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004069 string_doc, /* tp_doc */
4070 0, /* tp_traverse */
4071 0, /* tp_clear */
4072 (richcmpfunc)string_richcompare, /* tp_richcompare */
4073 0, /* tp_weaklistoffset */
4074 0, /* tp_iter */
4075 0, /* tp_iternext */
4076 string_methods, /* tp_methods */
4077 0, /* tp_members */
4078 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004079 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004080 0, /* tp_dict */
4081 0, /* tp_descr_get */
4082 0, /* tp_descr_set */
4083 0, /* tp_dictoffset */
4084 0, /* tp_init */
4085 0, /* tp_alloc */
4086 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004087 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088};
4089
4090void
Fred Drakeba096332000-07-09 07:04:36 +00004091PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004093 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004094 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004095 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004096 if (w == NULL || !PyString_Check(*pv)) {
4097 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004098 *pv = NULL;
4099 return;
4100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004101 v = string_concat((PyStringObject *) *pv, w);
4102 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004103 *pv = v;
4104}
4105
Guido van Rossum013142a1994-08-30 08:19:36 +00004106void
Fred Drakeba096332000-07-09 07:04:36 +00004107PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004108{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004109 PyString_Concat(pv, w);
4110 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004111}
4112
4113
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004114/* The following function breaks the notion that strings are immutable:
4115 it changes the size of a string. We get away with this only if there
4116 is only one module referencing the object. You can also think of it
4117 as creating a new string object and destroying the old one, only
4118 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004119 already be known to some other part of the code...
4120 Note that if there's not enough memory to resize the string, the original
4121 string object at *pv is deallocated, *pv is set to NULL, an "out of
4122 memory" exception is set, and -1 is returned. Else (on success) 0 is
4123 returned, and the value in *pv may or may not be the same as on input.
4124 As always, an extra byte is allocated for a trailing \0 byte (newsize
4125 does *not* include that), and a trailing \0 byte is stored.
4126*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127
4128int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004129_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004130{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004131 register PyObject *v;
4132 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004133 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004134 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4135 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004136 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004137 Py_DECREF(v);
4138 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004139 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004140 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004141 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004142 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004143 _Py_ForgetReference(v);
4144 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004145 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004146 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004147 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004148 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004149 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004151 _Py_NewReference(*pv);
4152 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004153 sv->ob_size = newsize;
4154 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004155 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004156 return 0;
4157}
Guido van Rossume5372401993-03-16 12:15:04 +00004158
4159/* Helpers for formatstring */
4160
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004161static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004162getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004163{
Thomas Wouters977485d2006-02-16 15:59:12 +00004164 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004165 if (argidx < arglen) {
4166 (*p_argidx)++;
4167 if (arglen < 0)
4168 return args;
4169 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004170 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004171 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004172 PyErr_SetString(PyExc_TypeError,
4173 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004174 return NULL;
4175}
4176
Tim Peters38fd5b62000-09-21 05:43:11 +00004177/* Format codes
4178 * F_LJUST '-'
4179 * F_SIGN '+'
4180 * F_BLANK ' '
4181 * F_ALT '#'
4182 * F_ZERO '0'
4183 */
Guido van Rossume5372401993-03-16 12:15:04 +00004184#define F_LJUST (1<<0)
4185#define F_SIGN (1<<1)
4186#define F_BLANK (1<<2)
4187#define F_ALT (1<<3)
4188#define F_ZERO (1<<4)
4189
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004190static int
Fred Drakeba096332000-07-09 07:04:36 +00004191formatfloat(char *buf, size_t buflen, int flags,
4192 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004193{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004194 /* fmt = '%#.' + `prec` + `type`
4195 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004196 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004197 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004198 x = PyFloat_AsDouble(v);
4199 if (x == -1.0 && PyErr_Occurred()) {
4200 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004201 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004202 }
Guido van Rossume5372401993-03-16 12:15:04 +00004203 if (prec < 0)
4204 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004205 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4206 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004207 /* Worst case length calc to ensure no buffer overrun:
4208
4209 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004210 fmt = %#.<prec>g
4211 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004212 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004213 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004214
4215 'f' formats:
4216 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4217 len = 1 + 50 + 1 + prec = 52 + prec
4218
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004219 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004220 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004221
4222 */
4223 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4224 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004225 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004226 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004227 return -1;
4228 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004229 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4230 (flags&F_ALT) ? "#" : "",
4231 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004232 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004233 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004234}
4235
Tim Peters38fd5b62000-09-21 05:43:11 +00004236/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4237 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4238 * Python's regular ints.
4239 * Return value: a new PyString*, or NULL if error.
4240 * . *pbuf is set to point into it,
4241 * *plen set to the # of chars following that.
4242 * Caller must decref it when done using pbuf.
4243 * The string starting at *pbuf is of the form
4244 * "-"? ("0x" | "0X")? digit+
4245 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004246 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004247 * There will be at least prec digits, zero-filled on the left if
4248 * necessary to get that many.
4249 * val object to be converted
4250 * flags bitmask of format flags; only F_ALT is looked at
4251 * prec minimum number of digits; 0-fill on left if needed
4252 * type a character in [duoxX]; u acts the same as d
4253 *
4254 * CAUTION: o, x and X conversions on regular ints can never
4255 * produce a '-' sign, but can for Python's unbounded ints.
4256 */
4257PyObject*
4258_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4259 char **pbuf, int *plen)
4260{
4261 PyObject *result = NULL;
4262 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004263 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004264 int sign; /* 1 if '-', else 0 */
4265 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004266 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004267 int numdigits; /* len == numnondigits + numdigits */
4268 int numnondigits = 0;
4269
4270 switch (type) {
4271 case 'd':
4272 case 'u':
4273 result = val->ob_type->tp_str(val);
4274 break;
4275 case 'o':
4276 result = val->ob_type->tp_as_number->nb_oct(val);
4277 break;
4278 case 'x':
4279 case 'X':
4280 numnondigits = 2;
4281 result = val->ob_type->tp_as_number->nb_hex(val);
4282 break;
4283 default:
4284 assert(!"'type' not in [duoxX]");
4285 }
4286 if (!result)
4287 return NULL;
4288
4289 /* To modify the string in-place, there can only be one reference. */
4290 if (result->ob_refcnt != 1) {
4291 PyErr_BadInternalCall();
4292 return NULL;
4293 }
4294 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004295 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004296 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004297 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4298 return NULL;
4299 }
4300 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004301 if (buf[len-1] == 'L') {
4302 --len;
4303 buf[len] = '\0';
4304 }
4305 sign = buf[0] == '-';
4306 numnondigits += sign;
4307 numdigits = len - numnondigits;
4308 assert(numdigits > 0);
4309
Tim Petersfff53252001-04-12 18:38:48 +00004310 /* Get rid of base marker unless F_ALT */
4311 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004312 /* Need to skip 0x, 0X or 0. */
4313 int skipped = 0;
4314 switch (type) {
4315 case 'o':
4316 assert(buf[sign] == '0');
4317 /* If 0 is only digit, leave it alone. */
4318 if (numdigits > 1) {
4319 skipped = 1;
4320 --numdigits;
4321 }
4322 break;
4323 case 'x':
4324 case 'X':
4325 assert(buf[sign] == '0');
4326 assert(buf[sign + 1] == 'x');
4327 skipped = 2;
4328 numnondigits -= 2;
4329 break;
4330 }
4331 if (skipped) {
4332 buf += skipped;
4333 len -= skipped;
4334 if (sign)
4335 buf[0] = '-';
4336 }
4337 assert(len == numnondigits + numdigits);
4338 assert(numdigits > 0);
4339 }
4340
4341 /* Fill with leading zeroes to meet minimum width. */
4342 if (prec > numdigits) {
4343 PyObject *r1 = PyString_FromStringAndSize(NULL,
4344 numnondigits + prec);
4345 char *b1;
4346 if (!r1) {
4347 Py_DECREF(result);
4348 return NULL;
4349 }
4350 b1 = PyString_AS_STRING(r1);
4351 for (i = 0; i < numnondigits; ++i)
4352 *b1++ = *buf++;
4353 for (i = 0; i < prec - numdigits; i++)
4354 *b1++ = '0';
4355 for (i = 0; i < numdigits; i++)
4356 *b1++ = *buf++;
4357 *b1 = '\0';
4358 Py_DECREF(result);
4359 result = r1;
4360 buf = PyString_AS_STRING(result);
4361 len = numnondigits + prec;
4362 }
4363
4364 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004365 if (type == 'X') {
4366 /* Need to convert all lower case letters to upper case.
4367 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004368 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004369 if (buf[i] >= 'a' && buf[i] <= 'x')
4370 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004371 }
4372 *pbuf = buf;
4373 *plen = len;
4374 return result;
4375}
4376
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004377static int
Fred Drakeba096332000-07-09 07:04:36 +00004378formatint(char *buf, size_t buflen, int flags,
4379 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004380{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004381 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004382 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4383 + 1 + 1 = 24 */
4384 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004385 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004386 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004387
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004388 x = PyInt_AsLong(v);
4389 if (x == -1 && PyErr_Occurred()) {
4390 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004391 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004392 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004393 if (x < 0 && type == 'u') {
4394 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004395 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004396 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4397 sign = "-";
4398 else
4399 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004400 if (prec < 0)
4401 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004402
4403 if ((flags & F_ALT) &&
4404 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004405 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004406 * of issues that cause pain:
4407 * - when 0 is being converted, the C standard leaves off
4408 * the '0x' or '0X', which is inconsistent with other
4409 * %#x/%#X conversions and inconsistent with Python's
4410 * hex() function
4411 * - there are platforms that violate the standard and
4412 * convert 0 with the '0x' or '0X'
4413 * (Metrowerks, Compaq Tru64)
4414 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004415 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004416 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004417 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004418 * We can achieve the desired consistency by inserting our
4419 * own '0x' or '0X' prefix, and substituting %x/%X in place
4420 * of %#x/%#X.
4421 *
4422 * Note that this is the same approach as used in
4423 * formatint() in unicodeobject.c
4424 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004425 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4426 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004427 }
4428 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004429 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4430 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004431 prec, type);
4432 }
4433
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004434 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4435 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004436 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004437 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004438 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004439 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004440 return -1;
4441 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004442 if (sign[0])
4443 PyOS_snprintf(buf, buflen, fmt, -x);
4444 else
4445 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004446 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004447}
4448
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004449static int
Fred Drakeba096332000-07-09 07:04:36 +00004450formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004451{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004452 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004453 if (PyString_Check(v)) {
4454 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004455 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004456 }
4457 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004458 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004459 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 }
4461 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004462 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004463}
4464
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004465/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4466
4467 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4468 chars are formatted. XXX This is a magic number. Each formatting
4469 routine does bounds checking to ensure no overflow, but a better
4470 solution may be to malloc a buffer of appropriate size for each
4471 format. For now, the current solution is sufficient.
4472*/
4473#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004474
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004475PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004476PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004477{
4478 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004479 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004480 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004481 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004482 PyObject *result, *orig_args;
4483#ifdef Py_USING_UNICODE
4484 PyObject *v, *w;
4485#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004486 PyObject *dict = NULL;
4487 if (format == NULL || !PyString_Check(format) || args == NULL) {
4488 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004489 return NULL;
4490 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004491 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004492 fmt = PyString_AS_STRING(format);
4493 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004494 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004495 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004496 if (result == NULL)
4497 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 res = PyString_AsString(result);
4499 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004500 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004501 argidx = 0;
4502 }
4503 else {
4504 arglen = -1;
4505 argidx = -2;
4506 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004507 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4508 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004509 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004510 while (--fmtcnt >= 0) {
4511 if (*fmt != '%') {
4512 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004513 rescnt = fmtcnt + 100;
4514 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004515 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004516 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004517 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004519 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004520 }
4521 *res++ = *fmt++;
4522 }
4523 else {
4524 /* Got a format specifier */
4525 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004526 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004527 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004528 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004529 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 PyObject *v = NULL;
4531 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004532 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004533 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004534 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004535 char formatbuf[FORMATBUFLEN];
4536 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004537#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004538 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004539 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004540#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004541
Guido van Rossumda9c2711996-12-05 21:58:58 +00004542 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004543 if (*fmt == '(') {
4544 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004545 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004546 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004547 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004548
4549 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004551 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004552 goto error;
4553 }
4554 ++fmt;
4555 --fmtcnt;
4556 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004557 /* Skip over balanced parentheses */
4558 while (pcount > 0 && --fmtcnt >= 0) {
4559 if (*fmt == ')')
4560 --pcount;
4561 else if (*fmt == '(')
4562 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004563 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004564 }
4565 keylen = fmt - keystart - 1;
4566 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004567 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 "incomplete format key");
4569 goto error;
4570 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 key = PyString_FromStringAndSize(keystart,
4572 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004573 if (key == NULL)
4574 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004575 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004576 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004577 args_owned = 0;
4578 }
4579 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004581 if (args == NULL) {
4582 goto error;
4583 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004584 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004585 arglen = -1;
4586 argidx = -2;
4587 }
Guido van Rossume5372401993-03-16 12:15:04 +00004588 while (--fmtcnt >= 0) {
4589 switch (c = *fmt++) {
4590 case '-': flags |= F_LJUST; continue;
4591 case '+': flags |= F_SIGN; continue;
4592 case ' ': flags |= F_BLANK; continue;
4593 case '#': flags |= F_ALT; continue;
4594 case '0': flags |= F_ZERO; continue;
4595 }
4596 break;
4597 }
4598 if (c == '*') {
4599 v = getnextarg(args, arglen, &argidx);
4600 if (v == NULL)
4601 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004602 if (!PyInt_Check(v)) {
4603 PyErr_SetString(PyExc_TypeError,
4604 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004605 goto error;
4606 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004607 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004608 if (width < 0) {
4609 flags |= F_LJUST;
4610 width = -width;
4611 }
Guido van Rossume5372401993-03-16 12:15:04 +00004612 if (--fmtcnt >= 0)
4613 c = *fmt++;
4614 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004615 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004616 width = c - '0';
4617 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004618 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004619 if (!isdigit(c))
4620 break;
4621 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004622 PyErr_SetString(
4623 PyExc_ValueError,
4624 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004625 goto error;
4626 }
4627 width = width*10 + (c - '0');
4628 }
4629 }
4630 if (c == '.') {
4631 prec = 0;
4632 if (--fmtcnt >= 0)
4633 c = *fmt++;
4634 if (c == '*') {
4635 v = getnextarg(args, arglen, &argidx);
4636 if (v == NULL)
4637 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004638 if (!PyInt_Check(v)) {
4639 PyErr_SetString(
4640 PyExc_TypeError,
4641 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004642 goto error;
4643 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004644 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004645 if (prec < 0)
4646 prec = 0;
4647 if (--fmtcnt >= 0)
4648 c = *fmt++;
4649 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004650 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004651 prec = c - '0';
4652 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004653 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004654 if (!isdigit(c))
4655 break;
4656 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004657 PyErr_SetString(
4658 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004659 "prec too big");
4660 goto error;
4661 }
4662 prec = prec*10 + (c - '0');
4663 }
4664 }
4665 } /* prec */
4666 if (fmtcnt >= 0) {
4667 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004668 if (--fmtcnt >= 0)
4669 c = *fmt++;
4670 }
4671 }
4672 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004673 PyErr_SetString(PyExc_ValueError,
4674 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004675 goto error;
4676 }
4677 if (c != '%') {
4678 v = getnextarg(args, arglen, &argidx);
4679 if (v == NULL)
4680 goto error;
4681 }
4682 sign = 0;
4683 fill = ' ';
4684 switch (c) {
4685 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004686 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004687 len = 1;
4688 break;
4689 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004690#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004691 if (PyUnicode_Check(v)) {
4692 fmt = fmt_start;
4693 argidx = argidx_start;
4694 goto unicode;
4695 }
Georg Brandld45014b2005-10-01 17:06:00 +00004696#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004697 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004698#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004699 if (temp != NULL && PyUnicode_Check(temp)) {
4700 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004701 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004702 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004703 goto unicode;
4704 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004705#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004706 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004707 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004708 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004709 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004710 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004711 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004712 if (!PyString_Check(temp)) {
4713 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004714 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004715 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004716 goto error;
4717 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004718 pbuf = PyString_AS_STRING(temp);
4719 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004720 if (prec >= 0 && len > prec)
4721 len = prec;
4722 break;
4723 case 'i':
4724 case 'd':
4725 case 'u':
4726 case 'o':
4727 case 'x':
4728 case 'X':
4729 if (c == 'i')
4730 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004731 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004732 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004733 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004734 prec, c, &pbuf, &ilen);
4735 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004736 if (!temp)
4737 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004738 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004739 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 else {
4741 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004742 len = formatint(pbuf,
4743 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004744 flags, prec, c, v);
4745 if (len < 0)
4746 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004747 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004748 }
4749 if (flags & F_ZERO)
4750 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004751 break;
4752 case 'e':
4753 case 'E':
4754 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004755 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004756 case 'g':
4757 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004758 if (c == 'F')
4759 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004760 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004761 len = formatfloat(pbuf, sizeof(formatbuf),
4762 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004763 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004764 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004765 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004766 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004767 fill = '0';
4768 break;
4769 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004770#ifdef Py_USING_UNICODE
4771 if (PyUnicode_Check(v)) {
4772 fmt = fmt_start;
4773 argidx = argidx_start;
4774 goto unicode;
4775 }
4776#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004777 pbuf = formatbuf;
4778 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004779 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004780 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004781 break;
4782 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004783 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004784 "unsupported format character '%c' (0x%x) "
4785 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004786 c, c,
4787 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004788 goto error;
4789 }
4790 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004791 if (*pbuf == '-' || *pbuf == '+') {
4792 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004793 len--;
4794 }
4795 else if (flags & F_SIGN)
4796 sign = '+';
4797 else if (flags & F_BLANK)
4798 sign = ' ';
4799 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004800 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004801 }
4802 if (width < len)
4803 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004804 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004805 reslen -= rescnt;
4806 rescnt = width + fmtcnt + 100;
4807 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004808 if (reslen < 0) {
4809 Py_DECREF(result);
4810 return PyErr_NoMemory();
4811 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004812 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004813 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004814 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004815 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004816 }
4817 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004818 if (fill != ' ')
4819 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004820 rescnt--;
4821 if (width > len)
4822 width--;
4823 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4825 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004826 assert(pbuf[1] == c);
4827 if (fill != ' ') {
4828 *res++ = *pbuf++;
4829 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004830 }
Tim Petersfff53252001-04-12 18:38:48 +00004831 rescnt -= 2;
4832 width -= 2;
4833 if (width < 0)
4834 width = 0;
4835 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004836 }
4837 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004838 do {
4839 --rescnt;
4840 *res++ = fill;
4841 } while (--width > len);
4842 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004843 if (fill == ' ') {
4844 if (sign)
4845 *res++ = sign;
4846 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004847 (c == 'x' || c == 'X')) {
4848 assert(pbuf[0] == '0');
4849 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004850 *res++ = *pbuf++;
4851 *res++ = *pbuf++;
4852 }
4853 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004854 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004855 res += len;
4856 rescnt -= len;
4857 while (--width >= len) {
4858 --rescnt;
4859 *res++ = ' ';
4860 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004861 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004862 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004863 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004864 goto error;
4865 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004866 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004867 } /* '%' */
4868 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004869 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004871 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004872 goto error;
4873 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004874 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004875 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004876 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004877 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004878 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004879
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004880#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004881 unicode:
4882 if (args_owned) {
4883 Py_DECREF(args);
4884 args_owned = 0;
4885 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004886 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004887 if (PyTuple_Check(orig_args) && argidx > 0) {
4888 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004889 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004890 v = PyTuple_New(n);
4891 if (v == NULL)
4892 goto error;
4893 while (--n >= 0) {
4894 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4895 Py_INCREF(w);
4896 PyTuple_SET_ITEM(v, n, w);
4897 }
4898 args = v;
4899 } else {
4900 Py_INCREF(orig_args);
4901 args = orig_args;
4902 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004903 args_owned = 1;
4904 /* Take what we have of the result and let the Unicode formatting
4905 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004906 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004907 if (_PyString_Resize(&result, rescnt))
4908 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004909 fmtcnt = PyString_GET_SIZE(format) - \
4910 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004911 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4912 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004913 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004914 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004915 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004916 if (v == NULL)
4917 goto error;
4918 /* Paste what we have (result) to what the Unicode formatting
4919 function returned (v) and return the result (or error) */
4920 w = PyUnicode_Concat(result, v);
4921 Py_DECREF(result);
4922 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004923 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004924 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004925#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004926
Guido van Rossume5372401993-03-16 12:15:04 +00004927 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004928 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004929 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004930 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004931 }
Guido van Rossume5372401993-03-16 12:15:04 +00004932 return NULL;
4933}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934
Guido van Rossum2a61e741997-01-18 07:55:05 +00004935void
Fred Drakeba096332000-07-09 07:04:36 +00004936PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004937{
4938 register PyStringObject *s = (PyStringObject *)(*p);
4939 PyObject *t;
4940 if (s == NULL || !PyString_Check(s))
4941 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004942 /* If it's a string subclass, we don't really know what putting
4943 it in the interned dict might do. */
4944 if (!PyString_CheckExact(s))
4945 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004946 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004948 if (interned == NULL) {
4949 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004950 if (interned == NULL) {
4951 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004952 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004954 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004955 t = PyDict_GetItem(interned, (PyObject *)s);
4956 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004957 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004958 Py_DECREF(*p);
4959 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004960 return;
4961 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962
Armin Rigo79f7ad22004-08-07 19:27:39 +00004963 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004964 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004965 return;
4966 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004967 /* The two references in interned are not counted by refcnt.
4968 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004969 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004970 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004971}
4972
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004973void
4974PyString_InternImmortal(PyObject **p)
4975{
4976 PyString_InternInPlace(p);
4977 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4978 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4979 Py_INCREF(*p);
4980 }
4981}
4982
Guido van Rossum2a61e741997-01-18 07:55:05 +00004983
4984PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004985PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004986{
4987 PyObject *s = PyString_FromString(cp);
4988 if (s == NULL)
4989 return NULL;
4990 PyString_InternInPlace(&s);
4991 return s;
4992}
4993
Guido van Rossum8cf04761997-08-02 02:57:45 +00004994void
Fred Drakeba096332000-07-09 07:04:36 +00004995PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004996{
4997 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004998 for (i = 0; i < UCHAR_MAX + 1; i++) {
4999 Py_XDECREF(characters[i]);
5000 characters[i] = NULL;
5001 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005002 Py_XDECREF(nullstring);
5003 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005004}
Barry Warsawa903ad982001-02-23 16:40:48 +00005005
Barry Warsawa903ad982001-02-23 16:40:48 +00005006void _Py_ReleaseInternedStrings(void)
5007{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005008 PyObject *keys;
5009 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005010 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005011
5012 if (interned == NULL || !PyDict_Check(interned))
5013 return;
5014 keys = PyDict_Keys(interned);
5015 if (keys == NULL || !PyList_Check(keys)) {
5016 PyErr_Clear();
5017 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005018 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019
5020 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5021 detector, interned strings are not forcibly deallocated; rather, we
5022 give them their stolen references back, and then clear and DECREF
5023 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005024
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005025 fprintf(stderr, "releasing interned strings\n");
5026 n = PyList_GET_SIZE(keys);
5027 for (i = 0; i < n; i++) {
5028 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5029 switch (s->ob_sstate) {
5030 case SSTATE_NOT_INTERNED:
5031 /* XXX Shouldn't happen */
5032 break;
5033 case SSTATE_INTERNED_IMMORTAL:
5034 s->ob_refcnt += 1;
5035 break;
5036 case SSTATE_INTERNED_MORTAL:
5037 s->ob_refcnt += 2;
5038 break;
5039 default:
5040 Py_FatalError("Inconsistent interned string state.");
5041 }
5042 s->ob_sstate = SSTATE_NOT_INTERNED;
5043 }
5044 Py_DECREF(keys);
5045 PyDict_Clear(interned);
5046 Py_DECREF(interned);
5047 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005048}