blob: 33cbf07406675dfd29156609e44caaa6b834b101 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Fredrik Lundhaf722372006-05-25 17:55:31 +00008#undef USE_INLINE /* XXX - set via configure? */
9
10#if defined(_MSC_VER) /* this is taken from _sre.c */
11#pragma warning(disable: 4710)
12/* fastest possible local call under MSVC */
13#define LOCAL(type) static __inline type __fastcall
14#elif defined(USE_INLINE)
15#define LOCAL(type) static inline type
16#else
17#define LOCAL(type) static type
18#endif
19
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020#ifdef COUNT_ALLOCS
21int null_strings, one_strings;
22#endif
23
Guido van Rossumc0b618a1997-05-02 03:12:38 +000024static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000025static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026
Guido van Rossum45ec02a2002-08-19 21:43:18 +000027/* This dictionary holds all interned strings. Note that references to
28 strings in this dictionary are *not* counted in the string's ob_refcnt.
29 When the interned string reaches a refcnt of 0 the string deallocation
30 function will delete the reference from this dictionary.
31
Tim Petersae1d0c92006-03-17 03:29:34 +000032 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000033 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
34*/
35static PyObject *interned;
36
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000039 For both PyString_FromString() and PyString_FromStringAndSize(), the
40 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000041 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000042
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000044 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000045
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000046 For PyString_FromStringAndSize(), the parameter the parameter `str' is
47 either NULL or else points to a string containing at least `size' bytes.
48 For PyString_FromStringAndSize(), the string in the `str' parameter does
49 not have to be null-terminated. (Therefore it is safe to construct a
50 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
51 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
52 bytes (setting the last byte to the null terminating character) and you can
53 fill in the data yourself. If `str' is non-NULL then the resulting
54 PyString object must be treated as immutable and you must not fill in nor
55 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000056
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000057 The PyObject member `op->ob_size', which denotes the number of "extra
58 items" in a variable-size object, will contain the number of bytes
59 allocated for string data, not counting the null terminating character. It
60 is therefore equal to the equal to the `size' parameter (for
61 PyString_FromStringAndSize()) or the length of the string in the `str'
62 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000065PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000066{
Tim Peters9e897f42001-05-09 07:37:07 +000067 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000068 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 if (size == 0 && (op = nullstring) != NULL) {
70#ifdef COUNT_ALLOCS
71 null_strings++;
72#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 Py_INCREF(op);
74 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
78 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef COUNT_ALLOCS
80 one_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000085
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000086 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000087 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000090 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000092 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000096 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000098 PyObject *t = (PyObject *)op;
99 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000100 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000104 PyObject *t = (PyObject *)op;
105 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000106 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111}
112
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000114PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115{
Tim Peters62de65b2001-12-06 20:29:32 +0000116 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000117 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000118
119 assert(str != NULL);
120 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000121 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000122 PyErr_SetString(PyExc_OverflowError,
123 "string is too long for a Python string");
124 return NULL;
125 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 if (size == 0 && (op = nullstring) != NULL) {
127#ifdef COUNT_ALLOCS
128 null_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
133 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
134#ifdef COUNT_ALLOCS
135 one_strings++;
136#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 Py_INCREF(op);
138 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000141 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000142 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000147 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000148 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000149 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000151 PyObject *t = (PyObject *)op;
152 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000153 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000170 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000178#ifdef __va_copy
179 __va_copy(count, vargs);
180#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000181 count = vargs;
182#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
188 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
189 ;
190
Tim Peters8931ff12006-05-13 23:28:20 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000196 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000197
Barry Warsawdadace02001-08-24 18:32:06 +0000198 switch (*f) {
199 case 'c':
200 (void)va_arg(count, int);
201 /* fall through... */
202 case '%':
203 n++;
204 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000205 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000206 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 /* 20 bytes is enough to hold a 64-bit
208 integer. Decimal takes the most space.
209 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000210 n += 20;
211 break;
212 case 's':
213 s = va_arg(count, char*);
214 n += strlen(s);
215 break;
216 case 'p':
217 (void) va_arg(count, int);
218 /* maximum 64-bit pointer representation:
219 * 0xffffffffffffffff
220 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000222 */
223 n += 19;
224 break;
225 default:
226 /* if we stumble upon an unknown
227 formatting code, copy the rest of
228 the format string to the output
229 string. (we cannot just skip the
230 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 n += strlen(p);
233 goto expand;
234 }
235 } else
236 n++;
237 }
238 expand:
239 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000240 /* Since we've analyzed how much space we need for the worst case,
241 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000242 string = PyString_FromStringAndSize(NULL, n);
243 if (!string)
244 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000245
Barry Warsawdadace02001-08-24 18:32:06 +0000246 s = PyString_AsString(string);
247
248 for (f = format; *f; f++) {
249 if (*f == '%') {
250 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 Py_ssize_t i;
252 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000253 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000254 /* parse the width.precision part (we're only
255 interested in the precision value, if any) */
256 n = 0;
257 while (isdigit(Py_CHARMASK(*f)))
258 n = (n*10) + *f++ - '0';
259 if (*f == '.') {
260 f++;
261 n = 0;
262 while (isdigit(Py_CHARMASK(*f)))
263 n = (n*10) + *f++ - '0';
264 }
265 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
266 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000267 /* handle the long flag, but only for %ld and %lu.
268 others can be added when necessary. */
269 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000270 longflag = 1;
271 ++f;
272 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000273 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000274 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000275 size_tflag = 1;
276 ++f;
277 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000278
Barry Warsawdadace02001-08-24 18:32:06 +0000279 switch (*f) {
280 case 'c':
281 *s++ = va_arg(vargs, int);
282 break;
283 case 'd':
284 if (longflag)
285 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000286 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
288 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
290 sprintf(s, "%d", va_arg(vargs, int));
291 s += strlen(s);
292 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000293 case 'u':
294 if (longflag)
295 sprintf(s, "%lu",
296 va_arg(vargs, unsigned long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
299 va_arg(vargs, size_t));
300 else
301 sprintf(s, "%u",
302 va_arg(vargs, unsigned int));
303 s += strlen(s);
304 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000305 case 'i':
306 sprintf(s, "%i", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 'x':
310 sprintf(s, "%x", va_arg(vargs, int));
311 s += strlen(s);
312 break;
313 case 's':
314 p = va_arg(vargs, char*);
315 i = strlen(p);
316 if (n > 0 && i > n)
317 i = n;
318 memcpy(s, p, i);
319 s += i;
320 break;
321 case 'p':
322 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000323 /* %p is ill-defined: ensure leading 0x. */
324 if (s[1] == 'X')
325 s[1] = 'x';
326 else if (s[1] != 'x') {
327 memmove(s+2, s, strlen(s)+1);
328 s[0] = '0';
329 s[1] = 'x';
330 }
Barry Warsawdadace02001-08-24 18:32:06 +0000331 s += strlen(s);
332 break;
333 case '%':
334 *s++ = '%';
335 break;
336 default:
337 strcpy(s, p);
338 s += strlen(s);
339 goto end;
340 }
341 } else
342 *s++ = *f;
343 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344
Barry Warsawdadace02001-08-24 18:32:06 +0000345 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000347 return string;
348}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000349
Barry Warsawdadace02001-08-24 18:32:06 +0000350PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000351PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000352{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000354 va_list vargs;
355
356#ifdef HAVE_STDARG_PROTOTYPES
357 va_start(vargs, format);
358#else
359 va_start(vargs);
360#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000361 ret = PyString_FromFormatV(format, vargs);
362 va_end(vargs);
363 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000364}
365
366
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000368 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 const char *encoding,
370 const char *errors)
371{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 PyObject *v, *str;
373
374 str = PyString_FromStringAndSize(s, size);
375 if (str == NULL)
376 return NULL;
377 v = PyString_AsDecodedString(str, encoding, errors);
378 Py_DECREF(str);
379 return v;
380}
381
382PyObject *PyString_AsDecodedObject(PyObject *str,
383 const char *encoding,
384 const char *errors)
385{
386 PyObject *v;
387
388 if (!PyString_Check(str)) {
389 PyErr_BadArgument();
390 goto onError;
391 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393 if (encoding == NULL) {
394#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000395 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#else
397 PyErr_SetString(PyExc_ValueError, "no encoding specified");
398 goto onError;
399#endif
400 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401
402 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 v = PyCodec_Decode(str, encoding, errors);
404 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000405 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000406
407 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000410 return NULL;
411}
412
413PyObject *PyString_AsDecodedString(PyObject *str,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v;
418
419 v = PyString_AsDecodedObject(str, encoding, errors);
420 if (v == NULL)
421 goto onError;
422
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000423#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424 /* Convert Unicode to a string using the default encoding */
425 if (PyUnicode_Check(v)) {
426 PyObject *temp = v;
427 v = PyUnicode_AsEncodedString(v, NULL, NULL);
428 Py_DECREF(temp);
429 if (v == NULL)
430 goto onError;
431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433 if (!PyString_Check(v)) {
434 PyErr_Format(PyExc_TypeError,
435 "decoder did not return a string object (type=%.400s)",
436 v->ob_type->tp_name);
437 Py_DECREF(v);
438 goto onError;
439 }
440
441 return v;
442
443 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 return NULL;
445}
446
447PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000448 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000453
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 str = PyString_FromStringAndSize(s, size);
455 if (str == NULL)
456 return NULL;
457 v = PyString_AsEncodedString(str, encoding, errors);
458 Py_DECREF(str);
459 return v;
460}
461
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 const char *encoding,
464 const char *errors)
465{
466 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000467
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(str)) {
469 PyErr_BadArgument();
470 goto onError;
471 }
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473 if (encoding == NULL) {
474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#else
477 PyErr_SetString(PyExc_ValueError, "no encoding specified");
478 goto onError;
479#endif
480 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000481
482 /* Encode via the codec registry */
483 v = PyCodec_Encode(str, encoding, errors);
484 if (v == NULL)
485 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
487 return v;
488
489 onError:
490 return NULL;
491}
492
493PyObject *PyString_AsEncodedString(PyObject *str,
494 const char *encoding,
495 const char *errors)
496{
497 PyObject *v;
498
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000499 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000500 if (v == NULL)
501 goto onError;
502
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000503#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 /* Convert Unicode to a string using the default encoding */
505 if (PyUnicode_Check(v)) {
506 PyObject *temp = v;
507 v = PyUnicode_AsEncodedString(v, NULL, NULL);
508 Py_DECREF(temp);
509 if (v == NULL)
510 goto onError;
511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000512#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 if (!PyString_Check(v)) {
514 PyErr_Format(PyExc_TypeError,
515 "encoder did not return a string object (type=%.400s)",
516 v->ob_type->tp_name);
517 Py_DECREF(v);
518 goto onError;
519 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000520
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000521 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000522
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000523 onError:
524 return NULL;
525}
526
Guido van Rossum234f9421993-06-17 12:35:49 +0000527static void
Fred Drakeba096332000-07-09 07:04:36 +0000528string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000529{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000530 switch (PyString_CHECK_INTERNED(op)) {
531 case SSTATE_NOT_INTERNED:
532 break;
533
534 case SSTATE_INTERNED_MORTAL:
535 /* revive dead object temporarily for DelItem */
536 op->ob_refcnt = 3;
537 if (PyDict_DelItem(interned, op) != 0)
538 Py_FatalError(
539 "deletion of interned string failed");
540 break;
541
542 case SSTATE_INTERNED_IMMORTAL:
543 Py_FatalError("Immortal interned string died.");
544
545 default:
546 Py_FatalError("Inconsistent interned string state.");
547 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000548 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000549}
550
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551/* Unescape a backslash-escaped string. If unicode is non-zero,
552 the string is a u-literal. If recode_encoding is non-zero,
553 the string is UTF-8 encoded and should be re-encoded in the
554 specified encoding. */
555
556PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 const char *recode_encoding)
561{
562 int c;
563 char *p, *buf;
564 const char *end;
565 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000566 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000567 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 if (v == NULL)
569 return NULL;
570 p = buf = PyString_AsString(v);
571 end = s + len;
572 while (s < end) {
573 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000574 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575#ifdef Py_USING_UNICODE
576 if (recode_encoding && (*s & 0x80)) {
577 PyObject *u, *w;
578 char *r;
579 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000580 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 t = s;
582 /* Decode non-ASCII bytes as UTF-8. */
583 while (t < end && (*t & 0x80)) t++;
584 u = PyUnicode_DecodeUTF8(s, t - s, errors);
585 if(!u) goto failed;
586
587 /* Recode them in target encoding. */
588 w = PyUnicode_AsEncodedString(
589 u, recode_encoding, errors);
590 Py_DECREF(u);
591 if (!w) goto failed;
592
593 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000594 assert(PyString_Check(w));
595 r = PyString_AS_STRING(w);
596 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 memcpy(p, r, rn);
598 p += rn;
599 Py_DECREF(w);
600 s = t;
601 } else {
602 *p++ = *s++;
603 }
604#else
605 *p++ = *s++;
606#endif
607 continue;
608 }
609 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000610 if (s==end) {
611 PyErr_SetString(PyExc_ValueError,
612 "Trailing \\ in string");
613 goto failed;
614 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000615 switch (*s++) {
616 /* XXX This assumes ASCII! */
617 case '\n': break;
618 case '\\': *p++ = '\\'; break;
619 case '\'': *p++ = '\''; break;
620 case '\"': *p++ = '\"'; break;
621 case 'b': *p++ = '\b'; break;
622 case 'f': *p++ = '\014'; break; /* FF */
623 case 't': *p++ = '\t'; break;
624 case 'n': *p++ = '\n'; break;
625 case 'r': *p++ = '\r'; break;
626 case 'v': *p++ = '\013'; break; /* VT */
627 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
628 case '0': case '1': case '2': case '3':
629 case '4': case '5': case '6': case '7':
630 c = s[-1] - '0';
631 if ('0' <= *s && *s <= '7') {
632 c = (c<<3) + *s++ - '0';
633 if ('0' <= *s && *s <= '7')
634 c = (c<<3) + *s++ - '0';
635 }
636 *p++ = c;
637 break;
638 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000639 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 && isxdigit(Py_CHARMASK(s[1]))) {
641 unsigned int x = 0;
642 c = Py_CHARMASK(*s);
643 s++;
644 if (isdigit(c))
645 x = c - '0';
646 else if (islower(c))
647 x = 10 + c - 'a';
648 else
649 x = 10 + c - 'A';
650 x = x << 4;
651 c = Py_CHARMASK(*s);
652 s++;
653 if (isdigit(c))
654 x += c - '0';
655 else if (islower(c))
656 x += 10 + c - 'a';
657 else
658 x += 10 + c - 'A';
659 *p++ = x;
660 break;
661 }
662 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000663 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667 if (strcmp(errors, "replace") == 0) {
668 *p++ = '?';
669 } else if (strcmp(errors, "ignore") == 0)
670 /* do nothing */;
671 else {
672 PyErr_Format(PyExc_ValueError,
673 "decoding error; "
674 "unknown error handling code: %.400s",
675 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#ifndef Py_USING_UNICODE
679 case 'u':
680 case 'U':
681 case 'N':
682 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000683 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 "Unicode escapes not legal "
685 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000686 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000687 }
688#endif
689 default:
690 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000691 s--;
692 goto non_esc; /* an arbitry number of unescaped
693 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000694 }
695 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000696 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000698 return v;
699 failed:
700 Py_DECREF(v);
701 return NULL;
702}
703
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705string_getsize(register PyObject *op)
706{
707 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (PyString_AsStringAndSize(op, &s, &len))
710 return -1;
711 return len;
712}
713
714static /*const*/ char *
715string_getbuffer(register PyObject *op)
716{
717 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000718 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyString_AsStringAndSize(op, &s, &len))
720 return NULL;
721 return s;
722}
723
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000725PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 if (!PyString_Check(op))
728 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730}
731
732/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000733PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (!PyString_Check(op))
736 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738}
739
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740int
741PyString_AsStringAndSize(register PyObject *obj,
742 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000743 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000744{
745 if (s == NULL) {
746 PyErr_BadInternalCall();
747 return -1;
748 }
749
750 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000752 if (PyUnicode_Check(obj)) {
753 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
754 if (obj == NULL)
755 return -1;
756 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000757 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000758#endif
759 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000760 PyErr_Format(PyExc_TypeError,
761 "expected string or Unicode object, "
762 "%.200s found", obj->ob_type->tp_name);
763 return -1;
764 }
765 }
766
767 *s = PyString_AS_STRING(obj);
768 if (len != NULL)
769 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000770 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000771 PyErr_SetString(PyExc_TypeError,
772 "expected string without null bytes");
773 return -1;
774 }
775 return 0;
776}
777
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778/* -------------------------------------------------------------------- */
779/* Helpers */
780
781#define USE_FAST /* experimental fast search implementation */
782
783/* XXX - this code is copied from unicodeobject.c. we really should
784 refactor the core implementations (see _sre.c for how this can be
785 done), but that'll have to wait -- fredrik */
786
787/* fast search/count implementation, based on a mix between boyer-
788 moore and horspool, with a few more bells and whistles on the top.
789 for some more background, see: http://effbot.org/stringlib */
790
791/* note: fastsearch may access s[n], which isn't a problem when using
792 Python's ordinary string types, but may cause problems if you're
793 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000794 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000795 it has actually checked for matches, but didn't find any. callers
796 beware! */
797
798#define FAST_COUNT 0
799#define FAST_SEARCH 1
800
801LOCAL(Py_ssize_t)
802 fastsearch(const unsigned char* s, Py_ssize_t n, const unsigned char* p,
803 Py_ssize_t m, int mode)
804{
805 long mask;
806 int skip, count = 0;
807 Py_ssize_t i, j, mlast, w;
808
809 w = n - m;
810
811 if (w < 0)
812 return -1;
813
814 /* look for special cases */
815 if (m <= 1) {
816 if (m <= 0)
817 return -1;
818 /* use special case for 1-character strings */
819 if (mode == FAST_COUNT) {
820 for (i = 0; i < n; i++)
821 if (s[i] == p[0])
822 count++;
823 return count;
824 } else {
825 for (i = 0; i < n; i++)
826 if (s[i] == p[0])
827 return i;
828 }
829 return -1;
830 }
831
832 mlast = m - 1;
833
834 /* create compressed boyer-moore delta 1 table */
835 skip = mlast - 1;
836 /* process pattern[:-1] */
837 for (mask = i = 0; i < mlast; i++) {
838 mask |= (1 << (p[i] & 0x1F));
839 if (p[i] == p[mlast])
840 skip = mlast - i - 1;
841 }
842 /* process pattern[-1] outside the loop */
843 mask |= (1 << (p[mlast] & 0x1F));
844
845 for (i = 0; i <= w; i++) {
846 /* note: using mlast in the skip path slows things down on x86 */
847 if (s[i+m-1] == p[m-1]) {
848 /* candidate match */
849 for (j = 0; j < mlast; j++)
850 if (s[i+j] != p[j])
851 break;
852 if (j == mlast) {
853 /* got a match! */
854 if (mode != FAST_COUNT)
855 return i;
856 count++;
857 i = i + mlast;
858 continue;
859 }
860 /* miss: check if next character is part of pattern */
861 if (!(mask & (1 << (s[i+m] & 0x1F))))
862 i = i + m;
863 else {
864 i = i + skip;
865 continue;
866 }
867 } else {
868 /* skip: check if next character is part of pattern */
869 if (!(mask & (1 << (s[i+m] & 0x1F))))
870 i = i + m;
871 }
872 }
873
874 if (mode != FAST_COUNT)
875 return -1;
876 return count;
877}
878
879/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880/* Methods */
881
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000882static int
Fred Drakeba096332000-07-09 07:04:36 +0000883string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000885 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000888
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000889 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000890 if (! PyString_CheckExact(op)) {
891 int ret;
892 /* A str subclass may have its own __str__ method. */
893 op = (PyStringObject *) PyObject_Str((PyObject *)op);
894 if (op == NULL)
895 return -1;
896 ret = string_print(op, fp, flags);
897 Py_DECREF(op);
898 return ret;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000901#ifdef __VMS
902 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
903#else
904 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
905#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000906 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000908
Thomas Wouters7e474022000-07-16 12:04:32 +0000909 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000910 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000911 if (memchr(op->ob_sval, '\'', op->ob_size) &&
912 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000913 quote = '"';
914
915 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 for (i = 0; i < op->ob_size; i++) {
917 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000918 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000919 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000920 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000921 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000922 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000923 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000924 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000925 fprintf(fp, "\\r");
926 else if (c < ' ' || c >= 0x7f)
927 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000928 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000929 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000931 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000932 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933}
934
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000935PyObject *
936PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000938 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000939 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000940 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000941 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000942 PyErr_SetString(PyExc_OverflowError,
943 "string is too large to make repr");
944 }
945 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000947 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
949 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951 register char c;
952 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000953 int quote;
954
Thomas Wouters7e474022000-07-16 12:04:32 +0000955 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000956 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000957 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000958 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000959 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000960 quote = '"';
961
Tim Peters9161c8b2001-12-03 01:55:38 +0000962 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000963 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000969 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000970 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000971 else if (c == '\t')
972 *p++ = '\\', *p++ = 't';
973 else if (c == '\n')
974 *p++ = '\\', *p++ = 'n';
975 else if (c == '\r')
976 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000977 else if (c < ' ' || c >= 0x7f) {
978 /* For performance, we don't want to call
979 PyOS_snprintf here (extra layers of
980 function call). */
981 sprintf(p, "\\x%02x", c & 0xff);
982 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000983 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000984 else
985 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000987 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000988 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000991 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000992 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994}
995
Guido van Rossum189f1df2001-05-01 16:51:53 +0000996static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000997string_repr(PyObject *op)
998{
999 return PyString_Repr(op, 1);
1000}
1001
1002static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001003string_str(PyObject *s)
1004{
Tim Petersc9933152001-10-16 20:18:24 +00001005 assert(PyString_Check(s));
1006 if (PyString_CheckExact(s)) {
1007 Py_INCREF(s);
1008 return s;
1009 }
1010 else {
1011 /* Subtype -- return genuine string with the same value. */
1012 PyStringObject *t = (PyStringObject *) s;
1013 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
1014 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001015}
1016
Martin v. Löwis18e16552006-02-15 17:27:45 +00001017static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001018string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001019{
1020 return a->ob_size;
1021}
1022
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025{
Andrew Dalke598710c2006-05-25 18:18:39 +00001026 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 register PyStringObject *op;
1028 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 if (PyUnicode_Check(bb))
1031 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001032#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001033 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001034 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001035 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036 return NULL;
1037 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001040 if ((a->ob_size == 0 || b->ob_size == 0) &&
1041 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042 if (a->ob_size == 0) {
1043 Py_INCREF(bb);
1044 return bb;
1045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 Py_INCREF(a);
1047 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 }
1049 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001050 if (size < 0) {
1051 PyErr_SetString(PyExc_OverflowError,
1052 "strings are too large to concat");
1053 return NULL;
1054 }
1055
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001056 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001057 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001058 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001060 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001061 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001062 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001063 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1064 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001065 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067#undef b
1068}
1069
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001070static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001071string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001072{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 register Py_ssize_t i;
1074 register Py_ssize_t j;
1075 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001077 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 if (n < 0)
1079 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001080 /* watch out for overflows: the size can overflow int,
1081 * and the # of bytes needed can overflow size_t
1082 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001083 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001084 if (n && size / n != a->ob_size) {
1085 PyErr_SetString(PyExc_OverflowError,
1086 "repeated string is too long");
1087 return NULL;
1088 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001089 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 Py_INCREF(a);
1091 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092 }
Tim Peterse7c05322004-06-27 17:24:49 +00001093 nbytes = (size_t)size;
1094 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001095 PyErr_SetString(PyExc_OverflowError,
1096 "repeated string is too long");
1097 return NULL;
1098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001100 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001101 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001102 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001103 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001104 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001105 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001106 op->ob_sval[size] = '\0';
1107 if (a->ob_size == 1 && n > 0) {
1108 memset(op->ob_sval, a->ob_sval[0] , n);
1109 return (PyObject *) op;
1110 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001111 i = 0;
1112 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001113 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1114 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001115 }
1116 while (i < size) {
1117 j = (i <= size-i) ? i : size-i;
1118 memcpy(op->ob_sval+i, op->ob_sval, j);
1119 i += j;
1120 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001122}
1123
1124/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001126static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001127string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001129 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001130{
1131 if (i < 0)
1132 i = 0;
1133 if (j < 0)
1134 j = 0; /* Avoid signed/unsigned bug in next line */
1135 if (j > a->ob_size)
1136 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001137 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1138 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001139 Py_INCREF(a);
1140 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001141 }
1142 if (j < i)
1143 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001144 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001145}
1146
Guido van Rossum9284a572000-03-07 15:53:43 +00001147static int
Fred Drakeba096332000-07-09 07:04:36 +00001148string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001149{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001150 char *s = PyString_AS_STRING(a);
1151 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001152 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001153#ifdef USE_FAST
1154 Py_ssize_t pos;
1155#else
1156 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001157 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001158 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001159#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001160
1161 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001162#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001163 if (PyUnicode_Check(el))
1164 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001165#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001166 if (!PyString_Check(el)) {
1167 PyErr_SetString(PyExc_TypeError,
1168 "'in <string>' requires string as left operand");
1169 return -1;
1170 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001171 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001172
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001173 if (len_sub == 0)
1174 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001175
1176#ifdef USE_FAST
1177 pos = fastsearch(
1178 s, PyString_GET_SIZE(a),
1179 sub, len_sub, FAST_SEARCH
1180 );
1181 return (pos != -1);
1182#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001183 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001184 substring. When s<last, there is still room for a possible match
1185 and s[0] through s[len_sub-1] will be in bounds.
1186 shortsub is len_sub minus the last character which is checked
1187 separately just before the memcmp(). That check helps prevent
1188 false starts and saves the setup time for memcmp().
1189 */
1190 firstchar = sub[0];
1191 shortsub = len_sub - 1;
1192 lastchar = sub[shortsub];
1193 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1194 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001195 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001196 if (s == NULL)
1197 return 0;
1198 assert(s < last);
1199 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001200 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001201 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001202 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001203#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001204 return 0;
1205}
1206
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001207static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001208string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001209{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001210 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001211 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001212 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001213 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001214 return NULL;
1215 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001216 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001217 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001218 if (v == NULL)
1219 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001220 else {
1221#ifdef COUNT_ALLOCS
1222 one_strings++;
1223#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001224 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001225 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001226 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001227}
1228
Martin v. Löwiscd353062001-05-24 16:56:35 +00001229static PyObject*
1230string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001231{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001232 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001233 Py_ssize_t len_a, len_b;
1234 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001235 PyObject *result;
1236
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001237 /* Make sure both arguments are strings. */
1238 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001239 result = Py_NotImplemented;
1240 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001241 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001242 if (a == b) {
1243 switch (op) {
1244 case Py_EQ:case Py_LE:case Py_GE:
1245 result = Py_True;
1246 goto out;
1247 case Py_NE:case Py_LT:case Py_GT:
1248 result = Py_False;
1249 goto out;
1250 }
1251 }
1252 if (op == Py_EQ) {
1253 /* Supporting Py_NE here as well does not save
1254 much time, since Py_NE is rarely used. */
1255 if (a->ob_size == b->ob_size
1256 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001257 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001258 a->ob_size) == 0)) {
1259 result = Py_True;
1260 } else {
1261 result = Py_False;
1262 }
1263 goto out;
1264 }
1265 len_a = a->ob_size; len_b = b->ob_size;
1266 min_len = (len_a < len_b) ? len_a : len_b;
1267 if (min_len > 0) {
1268 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1269 if (c==0)
1270 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1271 }else
1272 c = 0;
1273 if (c == 0)
1274 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1275 switch (op) {
1276 case Py_LT: c = c < 0; break;
1277 case Py_LE: c = c <= 0; break;
1278 case Py_EQ: assert(0); break; /* unreachable */
1279 case Py_NE: c = c != 0; break;
1280 case Py_GT: c = c > 0; break;
1281 case Py_GE: c = c >= 0; break;
1282 default:
1283 result = Py_NotImplemented;
1284 goto out;
1285 }
1286 result = c ? Py_True : Py_False;
1287 out:
1288 Py_INCREF(result);
1289 return result;
1290}
1291
1292int
1293_PyString_Eq(PyObject *o1, PyObject *o2)
1294{
1295 PyStringObject *a, *b;
1296 a = (PyStringObject*)o1;
1297 b = (PyStringObject*)o2;
1298 return a->ob_size == b->ob_size
1299 && *a->ob_sval == *b->ob_sval
1300 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001301}
1302
Guido van Rossum9bfef441993-03-29 10:43:31 +00001303static long
Fred Drakeba096332000-07-09 07:04:36 +00001304string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001305{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001306 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001307 register unsigned char *p;
1308 register long x;
1309
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001310 if (a->ob_shash != -1)
1311 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001312 len = a->ob_size;
1313 p = (unsigned char *) a->ob_sval;
1314 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001315 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001316 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001317 x ^= a->ob_size;
1318 if (x == -1)
1319 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001320 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001321 return x;
1322}
1323
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001324#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1325
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001326static PyObject*
1327string_subscript(PyStringObject* self, PyObject* item)
1328{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001329 PyNumberMethods *nb = item->ob_type->tp_as_number;
1330 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1331 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001332 if (i == -1 && PyErr_Occurred())
1333 return NULL;
1334 if (i < 0)
1335 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001336 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001337 }
1338 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001339 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001340 char* source_buf;
1341 char* result_buf;
1342 PyObject* result;
1343
Tim Petersae1d0c92006-03-17 03:29:34 +00001344 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001345 PyString_GET_SIZE(self),
1346 &start, &stop, &step, &slicelength) < 0) {
1347 return NULL;
1348 }
1349
1350 if (slicelength <= 0) {
1351 return PyString_FromStringAndSize("", 0);
1352 }
1353 else {
1354 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001355 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001356 if (result_buf == NULL)
1357 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001358
Tim Petersae1d0c92006-03-17 03:29:34 +00001359 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001360 cur += step, i++) {
1361 result_buf[i] = source_buf[cur];
1362 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001363
1364 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001365 slicelength);
1366 PyMem_Free(result_buf);
1367 return result;
1368 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001369 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001370 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001371 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001372 "string indices must be integers");
1373 return NULL;
1374 }
1375}
1376
Martin v. Löwis18e16552006-02-15 17:27:45 +00001377static Py_ssize_t
1378string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001379{
1380 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001381 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001382 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001383 return -1;
1384 }
1385 *ptr = (void *)self->ob_sval;
1386 return self->ob_size;
1387}
1388
Martin v. Löwis18e16552006-02-15 17:27:45 +00001389static Py_ssize_t
1390string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001391{
Guido van Rossum045e6881997-09-08 18:30:11 +00001392 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001393 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001394 return -1;
1395}
1396
Martin v. Löwis18e16552006-02-15 17:27:45 +00001397static Py_ssize_t
1398string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001399{
1400 if ( lenp )
1401 *lenp = self->ob_size;
1402 return 1;
1403}
1404
Martin v. Löwis18e16552006-02-15 17:27:45 +00001405static Py_ssize_t
1406string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001407{
1408 if ( index != 0 ) {
1409 PyErr_SetString(PyExc_SystemError,
1410 "accessing non-existent string segment");
1411 return -1;
1412 }
1413 *ptr = self->ob_sval;
1414 return self->ob_size;
1415}
1416
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001417static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001418 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001419 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001420 (ssizeargfunc)string_repeat, /*sq_repeat*/
1421 (ssizeargfunc)string_item, /*sq_item*/
1422 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001423 0, /*sq_ass_item*/
1424 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001425 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001426};
1427
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001428static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001429 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001430 (binaryfunc)string_subscript,
1431 0,
1432};
1433
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001434static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001435 (readbufferproc)string_buffer_getreadbuf,
1436 (writebufferproc)string_buffer_getwritebuf,
1437 (segcountproc)string_buffer_getsegcount,
1438 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001439};
1440
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441
1442
1443#define LEFTSTRIP 0
1444#define RIGHTSTRIP 1
1445#define BOTHSTRIP 2
1446
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001447/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001448static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1449
1450#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001451
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001452#define SPLIT_APPEND(data, left, right) \
1453 str = PyString_FromStringAndSize((data) + (left), \
1454 (right) - (left)); \
1455 if (str == NULL) \
1456 goto onError; \
1457 if (PyList_Append(list, str)) { \
1458 Py_DECREF(str); \
1459 goto onError; \
1460 } \
1461 else \
1462 Py_DECREF(str);
1463
1464#define SPLIT_INSERT(data, left, right) \
1465 str = PyString_FromStringAndSize((data) + (left), \
1466 (right) - (left)); \
1467 if (str == NULL) \
1468 goto onError; \
1469 if (PyList_Insert(list, 0, str)) { \
1470 Py_DECREF(str); \
1471 goto onError; \
1472 } \
1473 else \
1474 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475
1476static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001477split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001479 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001480 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 PyObject *list = PyList_New(0);
1482
1483 if (list == NULL)
1484 return NULL;
1485
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486 for (i = j = 0; i < len; ) {
1487 while (i < len && isspace(Py_CHARMASK(s[i])))
1488 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490 while (i < len && !isspace(Py_CHARMASK(s[i])))
1491 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (maxsplit-- <= 0)
1494 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 while (i < len && isspace(Py_CHARMASK(s[i])))
1497 i++;
1498 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 }
1500 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001502 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001505 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 Py_DECREF(list);
1507 return NULL;
1508}
1509
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001511split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001513 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001514 PyObject *str;
1515 PyObject *list = PyList_New(0);
1516
1517 if (list == NULL)
1518 return NULL;
1519
1520 for (i = j = 0; i < len; ) {
1521 if (s[i] == ch) {
1522 if (maxcount-- <= 0)
1523 break;
1524 SPLIT_APPEND(s, j, i);
1525 i = j = i + 1;
1526 } else
1527 i++;
1528 }
1529 if (j <= len) {
1530 SPLIT_APPEND(s, j, len);
1531 }
1532 return list;
1533
1534 onError:
1535 Py_DECREF(list);
1536 return NULL;
1537}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001539PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540"S.split([sep [,maxsplit]]) -> list of strings\n\
1541\n\
1542Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001544splits are done. If sep is not specified or is None, any\n\
1545whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546
1547static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001548string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001550 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1551 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001552 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001553 const char *s = PyString_AS_STRING(self), *sub;
1554 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Martin v. Löwis9c830762006-04-13 08:37:17 +00001556 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001559 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 if (PyString_Check(subobj)) {
1563 sub = PyString_AS_STRING(subobj);
1564 n = PyString_GET_SIZE(subobj);
1565 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001566#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 else if (PyUnicode_Check(subobj))
1568 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001569#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001570 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1571 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001572
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573 if (n == 0) {
1574 PyErr_SetString(PyExc_ValueError, "empty separator");
1575 return NULL;
1576 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001577 else if (n == 1)
1578 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580 list = PyList_New(0);
1581 if (list == NULL)
1582 return NULL;
1583
1584 i = j = 0;
1585 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001586 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001587 if (maxsplit-- <= 0)
1588 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001589 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590 if (item == NULL)
1591 goto fail;
1592 err = PyList_Append(list, item);
1593 Py_DECREF(item);
1594 if (err < 0)
1595 goto fail;
1596 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597 }
1598 else
1599 i++;
1600 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001601 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (item == NULL)
1603 goto fail;
1604 err = PyList_Append(list, item);
1605 Py_DECREF(item);
1606 if (err < 0)
1607 goto fail;
1608
1609 return list;
1610
1611 fail:
1612 Py_DECREF(list);
1613 return NULL;
1614}
1615
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001616static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001617rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001618{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001619 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001620 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 PyObject *list = PyList_New(0);
1622
1623 if (list == NULL)
1624 return NULL;
1625
1626 for (i = j = len - 1; i >= 0; ) {
1627 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1628 i--;
1629 j = i;
1630 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1631 i--;
1632 if (j > i) {
1633 if (maxsplit-- <= 0)
1634 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001635 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001636 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1637 i--;
1638 j = i;
1639 }
1640 }
1641 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001643 }
1644 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001645 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001646 Py_DECREF(list);
1647 return NULL;
1648}
1649
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001651rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001652{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001653 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001654 PyObject *str;
1655 PyObject *list = PyList_New(0);
1656
1657 if (list == NULL)
1658 return NULL;
1659
1660 for (i = j = len - 1; i >= 0; ) {
1661 if (s[i] == ch) {
1662 if (maxcount-- <= 0)
1663 break;
1664 SPLIT_INSERT(s, i + 1, j + 1);
1665 j = i = i - 1;
1666 } else
1667 i--;
1668 }
1669 if (j >= -1) {
1670 SPLIT_INSERT(s, 0, j + 1);
1671 }
1672 return list;
1673
1674 onError:
1675 Py_DECREF(list);
1676 return NULL;
1677}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678
1679PyDoc_STRVAR(rsplit__doc__,
1680"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1681\n\
1682Return a list of the words in the string S, using sep as the\n\
1683delimiter string, starting at the end of the string and working\n\
1684to the front. If maxsplit is given, at most maxsplit splits are\n\
1685done. If sep is not specified or is None, any whitespace string\n\
1686is a separator.");
1687
1688static PyObject *
1689string_rsplit(PyStringObject *self, PyObject *args)
1690{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001691 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1692 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001693 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 const char *s = PyString_AS_STRING(self), *sub;
1695 PyObject *list, *item, *subobj = Py_None;
1696
Martin v. Löwis9c830762006-04-13 08:37:17 +00001697 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 return NULL;
1699 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001700 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 if (subobj == Py_None)
1702 return rsplit_whitespace(s, len, maxsplit);
1703 if (PyString_Check(subobj)) {
1704 sub = PyString_AS_STRING(subobj);
1705 n = PyString_GET_SIZE(subobj);
1706 }
1707#ifdef Py_USING_UNICODE
1708 else if (PyUnicode_Check(subobj))
1709 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1710#endif
1711 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1712 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 if (n == 0) {
1715 PyErr_SetString(PyExc_ValueError, "empty separator");
1716 return NULL;
1717 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001718 else if (n == 1)
1719 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720
1721 list = PyList_New(0);
1722 if (list == NULL)
1723 return NULL;
1724
1725 j = len;
1726 i = j - n;
1727 while (i >= 0) {
1728 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1729 if (maxsplit-- <= 0)
1730 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001731 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001732 if (item == NULL)
1733 goto fail;
1734 err = PyList_Insert(list, 0, item);
1735 Py_DECREF(item);
1736 if (err < 0)
1737 goto fail;
1738 j = i;
1739 i -= n;
1740 }
1741 else
1742 i--;
1743 }
1744 item = PyString_FromStringAndSize(s, j);
1745 if (item == NULL)
1746 goto fail;
1747 err = PyList_Insert(list, 0, item);
1748 Py_DECREF(item);
1749 if (err < 0)
1750 goto fail;
1751
1752 return list;
1753
1754 fail:
1755 Py_DECREF(list);
1756 return NULL;
1757}
1758
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001760PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761"S.join(sequence) -> string\n\
1762\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001764sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765
1766static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001767string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768{
1769 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001770 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001773 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001775 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001776 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777
Tim Peters19fe14e2001-01-19 03:03:47 +00001778 seq = PySequence_Fast(orig, "");
1779 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001780 return NULL;
1781 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001782
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001783 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001784 if (seqlen == 0) {
1785 Py_DECREF(seq);
1786 return PyString_FromString("");
1787 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001789 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001790 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1791 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001793 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796
Raymond Hettinger674f2412004-08-23 23:23:54 +00001797 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001798 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001799 * Do a pre-pass to figure out the total amount of space we'll
1800 * need (sz), see whether any argument is absurd, and defer to
1801 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001802 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001803 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001805 item = PySequence_Fast_GET_ITEM(seq, i);
1806 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001807#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001808 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001809 /* Defer to Unicode join.
1810 * CAUTION: There's no gurantee that the
1811 * original sequence can be iterated over
1812 * again, so we must pass seq here.
1813 */
1814 PyObject *result;
1815 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001816 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001817 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001818 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001819#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001820 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001821 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001822 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001823 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001824 Py_DECREF(seq);
1825 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001827 sz += PyString_GET_SIZE(item);
1828 if (i != 0)
1829 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001830 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001831 PyErr_SetString(PyExc_OverflowError,
1832 "join() is too long for a Python string");
1833 Py_DECREF(seq);
1834 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001836 }
1837
1838 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001839 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001840 if (res == NULL) {
1841 Py_DECREF(seq);
1842 return NULL;
1843 }
1844
1845 /* Catenate everything. */
1846 p = PyString_AS_STRING(res);
1847 for (i = 0; i < seqlen; ++i) {
1848 size_t n;
1849 item = PySequence_Fast_GET_ITEM(seq, i);
1850 n = PyString_GET_SIZE(item);
1851 memcpy(p, PyString_AS_STRING(item), n);
1852 p += n;
1853 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001854 memcpy(p, sep, seplen);
1855 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001856 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001858
Jeremy Hylton49048292000-07-11 03:28:17 +00001859 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861}
1862
Tim Peters52e155e2001-06-16 05:42:57 +00001863PyObject *
1864_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001865{
Tim Petersa7259592001-06-16 05:11:17 +00001866 assert(sep != NULL && PyString_Check(sep));
1867 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001868 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001869}
1870
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001871static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001873{
1874 if (*end > len)
1875 *end = len;
1876 else if (*end < 0)
1877 *end += len;
1878 if (*end < 0)
1879 *end = 0;
1880 if (*start < 0)
1881 *start += len;
1882 if (*start < 0)
1883 *start = 0;
1884}
1885
Martin v. Löwis18e16552006-02-15 17:27:45 +00001886static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001887string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001889 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001890 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001891 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001892 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893
Martin v. Löwis18e16552006-02-15 17:27:45 +00001894 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001895 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001896 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001897 return -2;
1898 if (PyString_Check(subobj)) {
1899 sub = PyString_AS_STRING(subobj);
1900 n = PyString_GET_SIZE(subobj);
1901 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001902#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001903 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001904 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001905#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001906 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907 return -2;
1908
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001909 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001911#ifdef USE_FAST
1912 if (n == 0)
1913 return (dir > 0) ? i : last;
1914 if (dir > 0) {
1915 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1916 FAST_SEARCH);
1917 if (pos < 0)
1918 return pos;
1919 return pos + i;
1920 }
1921#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001922 if (dir > 0) {
1923 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 last -= n;
1926 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001927 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928 return (long)i;
1929 }
1930 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001931 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001932
Guido van Rossum4c08d552000-03-10 22:55:18 +00001933 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001934 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001935 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001936 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001937 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001939
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 return -1;
1941}
1942
1943
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001944PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945"S.find(sub [,start [,end]]) -> int\n\
1946\n\
1947Return the lowest index in S where substring sub is found,\n\
1948such that sub is contained within s[start,end]. Optional\n\
1949arguments start and end are interpreted as in slice notation.\n\
1950\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
1953static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001954string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001956 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 if (result == -2)
1958 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001959 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960}
1961
1962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001963PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964"S.index(sub [,start [,end]]) -> int\n\
1965\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
1968static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001969string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 if (result == -2)
1973 return NULL;
1974 if (result == -1) {
1975 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001976 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 return NULL;
1978 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984"S.rfind(sub [,start [,end]]) -> int\n\
1985\n\
1986Return the highest index in S where substring sub is found,\n\
1987such that sub is contained within s[start,end]. Optional\n\
1988arguments start and end are interpreted as in slice notation.\n\
1989\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001990Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991
1992static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001993string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001995 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996 if (result == -2)
1997 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001998 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999}
2000
2001
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002002PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003"S.rindex(sub [,start [,end]]) -> int\n\
2004\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002005Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
2007static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002008string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002010 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 if (result == -2)
2012 return NULL;
2013 if (result == -1) {
2014 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002015 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016 return NULL;
2017 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002018 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019}
2020
2021
2022static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002023do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2024{
2025 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002026 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002027 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002028 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2029 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002030
2031 i = 0;
2032 if (striptype != RIGHTSTRIP) {
2033 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2034 i++;
2035 }
2036 }
2037
2038 j = len;
2039 if (striptype != LEFTSTRIP) {
2040 do {
2041 j--;
2042 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2043 j++;
2044 }
2045
2046 if (i == 0 && j == len && PyString_CheckExact(self)) {
2047 Py_INCREF(self);
2048 return (PyObject*)self;
2049 }
2050 else
2051 return PyString_FromStringAndSize(s+i, j-i);
2052}
2053
2054
2055static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002056do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057{
2058 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002059 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061 i = 0;
2062 if (striptype != RIGHTSTRIP) {
2063 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2064 i++;
2065 }
2066 }
2067
2068 j = len;
2069 if (striptype != LEFTSTRIP) {
2070 do {
2071 j--;
2072 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2073 j++;
2074 }
2075
Tim Peters8fa5dd02001-09-12 02:18:30 +00002076 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077 Py_INCREF(self);
2078 return (PyObject*)self;
2079 }
2080 else
2081 return PyString_FromStringAndSize(s+i, j-i);
2082}
2083
2084
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085static PyObject *
2086do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2087{
2088 PyObject *sep = NULL;
2089
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002090 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091 return NULL;
2092
2093 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002094 if (PyString_Check(sep))
2095 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002096#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002097 else if (PyUnicode_Check(sep)) {
2098 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2099 PyObject *res;
2100 if (uniself==NULL)
2101 return NULL;
2102 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2103 striptype, sep);
2104 Py_DECREF(uniself);
2105 return res;
2106 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002107#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002108 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002109#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002110 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002111#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002112 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002113#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002114 STRIPNAME(striptype));
2115 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002116 }
2117
2118 return do_strip(self, striptype);
2119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002123"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124\n\
2125Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002126whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002127If chars is given and not None, remove characters in chars instead.\n\
2128If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129
2130static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002131string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002133 if (PyTuple_GET_SIZE(args) == 0)
2134 return do_strip(self, BOTHSTRIP); /* Common case */
2135 else
2136 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137}
2138
2139
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002140PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002141"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002143Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002144If chars is given and not None, remove characters in chars instead.\n\
2145If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146
2147static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002148string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002150 if (PyTuple_GET_SIZE(args) == 0)
2151 return do_strip(self, LEFTSTRIP); /* Common case */
2152 else
2153 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154}
2155
2156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002158"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002160Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002161If chars is given and not None, remove characters in chars instead.\n\
2162If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163
2164static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002165string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002167 if (PyTuple_GET_SIZE(args) == 0)
2168 return do_strip(self, RIGHTSTRIP); /* Common case */
2169 else
2170 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171}
2172
2173
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002174PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175"S.lower() -> string\n\
2176\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002177Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002179/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2180#ifndef _tolower
2181#define _tolower tolower
2182#endif
2183
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002185string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002187 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002188 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002191 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002192 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002194
2195 s = PyString_AS_STRING(newobj);
2196
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002197 memcpy(s, PyString_AS_STRING(self), n);
2198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002200 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002201 if (isupper(c))
2202 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002204
Anthony Baxtera6286212006-04-11 07:42:36 +00002205 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206}
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209"S.upper() -> string\n\
2210\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002211Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002213#ifndef _toupper
2214#define _toupper toupper
2215#endif
2216
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002218string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002220 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002221 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002222 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002224 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002225 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002227
2228 s = PyString_AS_STRING(newobj);
2229
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002230 memcpy(s, PyString_AS_STRING(self), n);
2231
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002233 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002234 if (islower(c))
2235 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002237
Anthony Baxtera6286212006-04-11 07:42:36 +00002238 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239}
2240
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002241PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242"S.title() -> string\n\
2243\n\
2244Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002245characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246
2247static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002248string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249{
2250 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002251 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002253 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
Anthony Baxtera6286212006-04-11 07:42:36 +00002255 newobj = PyString_FromStringAndSize(NULL, n);
2256 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002258 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 for (i = 0; i < n; i++) {
2260 int c = Py_CHARMASK(*s++);
2261 if (islower(c)) {
2262 if (!previous_is_cased)
2263 c = toupper(c);
2264 previous_is_cased = 1;
2265 } else if (isupper(c)) {
2266 if (previous_is_cased)
2267 c = tolower(c);
2268 previous_is_cased = 1;
2269 } else
2270 previous_is_cased = 0;
2271 *s_new++ = c;
2272 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002273 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274}
2275
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002276PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277"S.capitalize() -> string\n\
2278\n\
2279Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002280capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281
2282static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002283string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284{
2285 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002286 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002287 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288
Anthony Baxtera6286212006-04-11 07:42:36 +00002289 newobj = PyString_FromStringAndSize(NULL, n);
2290 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002292 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002293 if (0 < n) {
2294 int c = Py_CHARMASK(*s++);
2295 if (islower(c))
2296 *s_new = toupper(c);
2297 else
2298 *s_new = c;
2299 s_new++;
2300 }
2301 for (i = 1; i < n; i++) {
2302 int c = Py_CHARMASK(*s++);
2303 if (isupper(c))
2304 *s_new = tolower(c);
2305 else
2306 *s_new = c;
2307 s_new++;
2308 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002309 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310}
2311
2312
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002313PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314"S.count(sub[, start[, end]]) -> int\n\
2315\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002316Return the number of non-overlapping occurrences of substring sub in\n\
2317string S[start:end]. Optional arguments start and end are interpreted\n\
2318as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319
2320static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002321string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002323 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002324 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002325 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002326 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328
Guido van Rossumc6821402000-05-08 14:08:05 +00002329 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2330 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002332
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 if (PyString_Check(subobj)) {
2334 sub = PyString_AS_STRING(subobj);
2335 n = PyString_GET_SIZE(subobj);
2336 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002337#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002338 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002339 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002340 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2341 if (count == -1)
2342 return NULL;
2343 else
2344 return PyInt_FromLong((long) count);
2345 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002346#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2348 return NULL;
2349
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002350 string_adjust_indices(&i, &last, len);
2351
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 m = last + 1 - n;
2353 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002354 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355
Fredrik Lundhaf722372006-05-25 17:55:31 +00002356#ifdef USE_FAST
2357 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2358 if (r < 0)
2359 r = 0; /* no match */
2360#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 r = 0;
2362 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002363 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364 if (!memcmp(s+i, sub, n)) {
2365 r++;
2366 i += n;
2367 } else {
2368 i++;
2369 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002370 if (i >= m)
2371 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002372 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002373 if (t == NULL)
2374 break;
2375 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002377#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002378 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379}
2380
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002381PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382"S.swapcase() -> string\n\
2383\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002385converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386
2387static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002388string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002389{
2390 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002391 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002392 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002393
Anthony Baxtera6286212006-04-11 07:42:36 +00002394 newobj = PyString_FromStringAndSize(NULL, n);
2395 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002397 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 for (i = 0; i < n; i++) {
2399 int c = Py_CHARMASK(*s++);
2400 if (islower(c)) {
2401 *s_new = toupper(c);
2402 }
2403 else if (isupper(c)) {
2404 *s_new = tolower(c);
2405 }
2406 else
2407 *s_new = c;
2408 s_new++;
2409 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002410 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411}
2412
2413
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002414PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415"S.translate(table [,deletechars]) -> string\n\
2416\n\
2417Return a copy of the string S, where all characters occurring\n\
2418in the optional argument deletechars are removed, and the\n\
2419remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002420translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421
2422static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002423string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 register char *input, *output;
2426 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002427 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002430 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 PyObject *result;
2432 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002435 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438
2439 if (PyString_Check(tableobj)) {
2440 table1 = PyString_AS_STRING(tableobj);
2441 tablen = PyString_GET_SIZE(tableobj);
2442 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002443#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002445 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002446 parameter; instead a mapping to None will cause characters
2447 to be deleted. */
2448 if (delobj != NULL) {
2449 PyErr_SetString(PyExc_TypeError,
2450 "deletions are implemented differently for unicode");
2451 return NULL;
2452 }
2453 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2454 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002455#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458
Martin v. Löwis00b61272002-12-12 20:03:19 +00002459 if (tablen != 256) {
2460 PyErr_SetString(PyExc_ValueError,
2461 "translation table must be 256 characters long");
2462 return NULL;
2463 }
2464
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 if (delobj != NULL) {
2466 if (PyString_Check(delobj)) {
2467 del_table = PyString_AS_STRING(delobj);
2468 dellen = PyString_GET_SIZE(delobj);
2469 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002470#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002471 else if (PyUnicode_Check(delobj)) {
2472 PyErr_SetString(PyExc_TypeError,
2473 "deletions are implemented differently for unicode");
2474 return NULL;
2475 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002476#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2478 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479 }
2480 else {
2481 del_table = NULL;
2482 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 }
2484
2485 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002486 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487 result = PyString_FromStringAndSize((char *)NULL, inlen);
2488 if (result == NULL)
2489 return NULL;
2490 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002491 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492
2493 if (dellen == 0) {
2494 /* If no deletions are required, use faster code */
2495 for (i = inlen; --i >= 0; ) {
2496 c = Py_CHARMASK(*input++);
2497 if (Py_CHARMASK((*output++ = table[c])) != c)
2498 changed = 1;
2499 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002500 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 return result;
2502 Py_DECREF(result);
2503 Py_INCREF(input_obj);
2504 return input_obj;
2505 }
2506
2507 for (i = 0; i < 256; i++)
2508 trans_table[i] = Py_CHARMASK(table[i]);
2509
2510 for (i = 0; i < dellen; i++)
2511 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2512
2513 for (i = inlen; --i >= 0; ) {
2514 c = Py_CHARMASK(*input++);
2515 if (trans_table[c] != -1)
2516 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2517 continue;
2518 changed = 1;
2519 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002520 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521 Py_DECREF(result);
2522 Py_INCREF(input_obj);
2523 return input_obj;
2524 }
2525 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002526 if (inlen > 0)
2527 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002528 return result;
2529}
2530
2531
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002532#define FORWARD 1
2533#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002535/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002537/* Don't call if length < 2 */
2538#define Py_STRING_MATCH(target, offset, pattern, length) \
2539 (target[offset] == pattern[0] && \
2540 target[offset+length-1] == pattern[length-1] && \
2541 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002542
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543#define findchar(target, target_len, c) \
2544 ((char *)memchr((const void *)(target), c, target_len))
2545
2546/* String ops must return a string. */
2547/* If the object is subclass of string, create a copy */
2548static PyStringObject *
2549return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002550{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002551 if (PyString_CheckExact(self)) {
2552 Py_INCREF(self);
2553 return self;
2554 }
2555 return (PyStringObject *)PyString_FromStringAndSize(
2556 PyString_AS_STRING(self),
2557 PyString_GET_SIZE(self));
2558}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560static Py_ssize_t
2561countchar(char *target, int target_len, char c)
2562{
2563 Py_ssize_t count=0;
2564 char *start=target;
2565 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 while ( (start=findchar(start, end-start, c)) != NULL ) {
2568 count++;
2569 start += 1;
2570 }
2571
2572 return count;
2573}
2574
2575static Py_ssize_t
2576findstring(char *target, Py_ssize_t target_len,
2577 char *pattern, Py_ssize_t pattern_len,
2578 Py_ssize_t start,
2579 Py_ssize_t end,
2580 int direction)
2581{
2582 if (start < 0) {
2583 start += target_len;
2584 if (start < 0)
2585 start = 0;
2586 }
2587 if (end > target_len) {
2588 end = target_len;
2589 } else if (end < 0) {
2590 end += target_len;
2591 if (end < 0)
2592 end = 0;
2593 }
2594
2595 /* zero-length substrings always match at the first attempt */
2596 if (pattern_len == 0)
2597 return (direction > 0) ? start : end;
2598
2599 end -= pattern_len;
2600
2601 if (direction < 0) {
2602 for (; end >= start; end--)
2603 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2604 return end;
2605 } else {
2606 for (; start <= end; start++)
2607 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2608 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002609 }
2610 return -1;
2611}
2612
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002613Py_ssize_t
2614countstring(char *target, Py_ssize_t target_len,
2615 char *pattern, Py_ssize_t pattern_len,
2616 Py_ssize_t start,
2617 Py_ssize_t end,
2618 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002620 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002622 if (start < 0) {
2623 start += target_len;
2624 if (start < 0)
2625 start = 0;
2626 }
2627 if (end > target_len) {
2628 end = target_len;
2629 } else if (end < 0) {
2630 end += target_len;
2631 if (end < 0)
2632 end = 0;
2633 }
2634
2635 /* zero-length substrings match everywhere */
2636 if (pattern_len == 0)
2637 return target_len+1;
2638
2639 end -= pattern_len;
2640
2641 if (direction < 0) {
2642 for (; end >= start; end--)
2643 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2644 count++;
2645 end -= pattern_len-1;
2646 }
2647 } else {
2648 for (; start <= end; start++)
2649 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2650 count++;
2651 start += pattern_len-1;
2652 }
2653 }
2654 return count;
2655}
2656
2657
2658/* Algorithms for difference cases of string replacement */
2659
2660/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2661static PyStringObject *
2662replace_interleave(PyStringObject *self,
2663 PyStringObject *to,
2664 Py_ssize_t maxcount)
2665{
2666 char *self_s, *to_s, *result_s;
2667 Py_ssize_t self_len, to_len, result_len;
2668 Py_ssize_t count, i, product;
2669 PyStringObject *result;
2670
2671 self_len = PyString_GET_SIZE(self);
2672 to_len = PyString_GET_SIZE(to);
2673
2674 /* 1 at the end plus 1 after every character */
2675 count = self_len+1;
2676 if (maxcount < count)
2677 count = maxcount;
2678
2679 /* Check for overflow */
2680 /* result_len = count * to_len + self_len; */
2681 product = count * to_len;
2682 if (product / to_len != count) {
2683 PyErr_SetString(PyExc_OverflowError,
2684 "replace string is too long");
2685 return NULL;
2686 }
2687 result_len = product + self_len;
2688 if (result_len < 0) {
2689 PyErr_SetString(PyExc_OverflowError,
2690 "replace string is too long");
2691 return NULL;
2692 }
2693
2694 if (! (result = (PyStringObject *)
2695 PyString_FromStringAndSize(NULL, result_len)) )
2696 return NULL;
2697
2698 self_s = PyString_AS_STRING(self);
2699 to_s = PyString_AS_STRING(to);
2700 to_len = PyString_GET_SIZE(to);
2701 result_s = PyString_AS_STRING(result);
2702
2703 /* TODO: special case single character, which doesn't need memcpy */
2704
2705 /* Lay the first one down (guaranteed this will occur) */
2706 memcpy(result_s, to_s, to_len);
2707 result_s += to_len;
2708 count -= 1;
2709
2710 for (i=0; i<count; i++) {
2711 *result_s++ = *self_s++;
2712 memcpy(result_s, to_s, to_len);
2713 result_s += to_len;
2714 }
2715
2716 /* Copy the rest of the original string */
2717 memcpy(result_s, self_s, self_len-i);
2718
2719 return result;
2720}
2721
2722/* Special case for deleting a single character */
2723/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2724static PyStringObject *
2725replace_delete_single_character(PyStringObject *self,
2726 char from_c, Py_ssize_t maxcount)
2727{
2728 char *self_s, *result_s;
2729 char *start, *next, *end;
2730 Py_ssize_t self_len, result_len;
2731 Py_ssize_t count;
2732 PyStringObject *result;
2733
2734 self_len = PyString_GET_SIZE(self);
2735 self_s = PyString_AS_STRING(self);
2736
2737 count = countchar(self_s, self_len, from_c);
2738 if (count == 0) {
2739 return return_self(self);
2740 }
2741 if (count > maxcount)
2742 count = maxcount;
2743
2744 result_len = self_len - count; /* from_len == 1 */
2745 assert(result_len>=0);
2746
2747 if ( (result = (PyStringObject *)
2748 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2749 return NULL;
2750 result_s = PyString_AS_STRING(result);
2751
2752 start = self_s;
2753 end = self_s + self_len;
2754 while (count-- > 0) {
2755 next = findchar(start, end-start, from_c);
2756 if (next == NULL)
2757 break;
2758 memcpy(result_s, start, next-start);
2759 result_s += (next-start);
2760 start = next+1;
2761 }
2762 memcpy(result_s, start, end-start);
2763
2764 return result;
2765}
2766
2767/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2768
2769static PyStringObject *
2770replace_delete_substring(PyStringObject *self, PyStringObject *from,
2771 Py_ssize_t maxcount) {
2772 char *self_s, *from_s, *result_s;
2773 char *start, *next, *end;
2774 Py_ssize_t self_len, from_len, result_len;
2775 Py_ssize_t count, offset;
2776 PyStringObject *result;
2777
2778 self_len = PyString_GET_SIZE(self);
2779 self_s = PyString_AS_STRING(self);
2780 from_len = PyString_GET_SIZE(from);
2781 from_s = PyString_AS_STRING(from);
2782
2783 count = countstring(self_s, self_len,
2784 from_s, from_len,
2785 0, self_len, 1);
2786
2787 if (count > maxcount)
2788 count = maxcount;
2789
2790 if (count == 0) {
2791 /* no matches */
2792 return return_self(self);
2793 }
2794
2795 result_len = self_len - (count * from_len);
2796 assert (result_len>=0);
2797
2798 if ( (result = (PyStringObject *)
2799 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2800 return NULL;
2801
2802 result_s = PyString_AS_STRING(result);
2803
2804 start = self_s;
2805 end = self_s + self_len;
2806 while (count-- > 0) {
2807 offset = findstring(start, end-start,
2808 from_s, from_len,
2809 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002810 if (offset == -1)
2811 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 next = start + offset;
2813
2814 memcpy(result_s, start, next-start);
2815
2816 result_s += (next-start);
2817 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002818 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 memcpy(result_s, start, end-start);
2820 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002821}
2822
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2824static PyStringObject *
2825replace_single_character_in_place(PyStringObject *self,
2826 char from_c, char to_c,
2827 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002828{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 char *self_s, *result_s, *start, *end, *next;
2830 Py_ssize_t self_len;
2831 PyStringObject *result;
2832
2833 /* The result string will be the same size */
2834 self_s = PyString_AS_STRING(self);
2835 self_len = PyString_GET_SIZE(self);
2836
2837 next = findchar(self_s, self_len, from_c);
2838
2839 if (next == NULL) {
2840 /* No matches; return the original string */
2841 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002842 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843
2844 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002845 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 if (result == NULL)
2847 return NULL;
2848 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002849 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850
2851 /* change everything in-place, starting with this one */
2852 start = result_s + (next-self_s);
2853 *start = to_c;
2854 start++;
2855 end = result_s + self_len;
2856
2857 while (--maxcount > 0) {
2858 next = findchar(start, end-start, from_c);
2859 if (next == NULL)
2860 break;
2861 *next = to_c;
2862 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002863 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864
2865 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002866}
2867
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002868/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2869static PyStringObject *
2870replace_substring_in_place(PyStringObject *self,
2871 PyStringObject *from,
2872 PyStringObject *to,
2873 Py_ssize_t maxcount)
2874{
2875 char *result_s, *start, *end;
2876 char *self_s, *from_s, *to_s;
2877 Py_ssize_t self_len, from_len, offset;
2878 PyStringObject *result;
2879
2880 /* The result string will be the same size */
2881
2882 self_s = PyString_AS_STRING(self);
2883 self_len = PyString_GET_SIZE(self);
2884
2885 from_s = PyString_AS_STRING(from);
2886 from_len = PyString_GET_SIZE(from);
2887 to_s = PyString_AS_STRING(to);
2888
2889 offset = findstring(self_s, self_len,
2890 from_s, from_len,
2891 0, self_len, FORWARD);
2892
2893 if (offset == -1) {
2894 /* No matches; return the original string */
2895 return return_self(self);
2896 }
2897
2898 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002899 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900 if (result == NULL)
2901 return NULL;
2902 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002903 memcpy(result_s, self_s, self_len);
2904
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002905
2906 /* change everything in-place, starting with this one */
2907 start = result_s + offset;
2908 memcpy(start, to_s, from_len);
2909 start += from_len;
2910 end = result_s + self_len;
2911
2912 while ( --maxcount > 0) {
2913 offset = findstring(start, end-start,
2914 from_s, from_len,
2915 0, end-start, FORWARD);
2916 if (offset==-1)
2917 break;
2918 memcpy(start+offset, to_s, from_len);
2919 start += offset+from_len;
2920 }
2921
2922 return result;
2923}
2924
2925/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2926static PyStringObject *
2927replace_single_character(PyStringObject *self,
2928 char from_c,
2929 PyStringObject *to,
2930 Py_ssize_t maxcount)
2931{
2932 char *self_s, *to_s, *result_s;
2933 char *start, *next, *end;
2934 Py_ssize_t self_len, to_len, result_len;
2935 Py_ssize_t count, product;
2936 PyStringObject *result;
2937
2938 self_s = PyString_AS_STRING(self);
2939 self_len = PyString_GET_SIZE(self);
2940
2941 count = countchar(self_s, self_len, from_c);
2942 if (count > maxcount)
2943 count = maxcount;
2944
2945 if (count == 0) {
2946 /* no matches, return unchanged */
2947 return return_self(self);
2948 }
2949
2950 to_s = PyString_AS_STRING(to);
2951 to_len = PyString_GET_SIZE(to);
2952
2953 /* use the difference between current and new, hence the "-1" */
2954 /* result_len = self_len + count * (to_len-1) */
2955 product = count * (to_len-1);
2956 if (product / (to_len-1) != count) {
2957 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2958 return NULL;
2959 }
2960 result_len = self_len + product;
2961 if (result_len < 0) {
2962 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2963 return NULL;
2964 }
2965
2966 if ( (result = (PyStringObject *)
2967 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2968 return NULL;
2969 result_s = PyString_AS_STRING(result);
2970
2971 start = self_s;
2972 end = self_s + self_len;
2973 while (count-- > 0) {
2974 next = findchar(start, end-start, from_c);
2975 if (next == NULL)
2976 break;
2977
2978 if (next == start) {
2979 /* replace with the 'to' */
2980 memcpy(result_s, to_s, to_len);
2981 result_s += to_len;
2982 start += 1;
2983 } else {
2984 /* copy the unchanged old then the 'to' */
2985 memcpy(result_s, start, next-start);
2986 result_s += (next-start);
2987 memcpy(result_s, to_s, to_len);
2988 result_s += to_len;
2989 start = next+1;
2990 }
2991 }
2992 /* Copy the remainder of the remaining string */
2993 memcpy(result_s, start, end-start);
2994
2995 return result;
2996}
2997
2998/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2999static PyStringObject *
3000replace_substring(PyStringObject *self,
3001 PyStringObject *from,
3002 PyStringObject *to,
3003 Py_ssize_t maxcount) {
3004 char *self_s, *from_s, *to_s, *result_s;
3005 char *start, *next, *end;
3006 Py_ssize_t self_len, from_len, to_len, result_len;
3007 Py_ssize_t count, offset, product;
3008 PyStringObject *result;
3009
3010 self_s = PyString_AS_STRING(self);
3011 self_len = PyString_GET_SIZE(self);
3012 from_s = PyString_AS_STRING(from);
3013 from_len = PyString_GET_SIZE(from);
3014
3015 count = countstring(self_s, self_len,
3016 from_s, from_len,
3017 0, self_len, FORWARD);
3018 if (count > maxcount)
3019 count = maxcount;
3020
3021 if (count == 0) {
3022 /* no matches, return unchanged */
3023 return return_self(self);
3024 }
3025
3026 to_s = PyString_AS_STRING(to);
3027 to_len = PyString_GET_SIZE(to);
3028
3029 /* Check for overflow */
3030 /* result_len = self_len + count * (to_len-from_len) */
3031 product = count * (to_len-from_len);
3032 if (product / (to_len-from_len) != count) {
3033 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3034 return NULL;
3035 }
3036 result_len = self_len + product;
3037 if (result_len < 0) {
3038 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3039 return NULL;
3040 }
3041
3042 if ( (result = (PyStringObject *)
3043 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3044 return NULL;
3045 result_s = PyString_AS_STRING(result);
3046
3047 start = self_s;
3048 end = self_s + self_len;
3049 while (count-- > 0) {
3050 offset = findstring(start, end-start,
3051 from_s, from_len,
3052 0, end-start, FORWARD);
3053 if (offset == -1)
3054 break;
3055 next = start+offset;
3056 if (next == start) {
3057 /* replace with the 'to' */
3058 memcpy(result_s, to_s, to_len);
3059 result_s += to_len;
3060 start += from_len;
3061 } else {
3062 /* copy the unchanged old then the 'to' */
3063 memcpy(result_s, start, next-start);
3064 result_s += (next-start);
3065 memcpy(result_s, to_s, to_len);
3066 result_s += to_len;
3067 start = next+from_len;
3068 }
3069 }
3070 /* Copy the remainder of the remaining string */
3071 memcpy(result_s, start, end-start);
3072
3073 return result;
3074}
3075
3076
3077static PyStringObject *
3078replace(PyStringObject *self,
3079 PyStringObject *from,
3080 PyStringObject *to,
3081 Py_ssize_t maxcount)
3082{
3083 Py_ssize_t from_len, to_len;
3084
3085 if (maxcount < 0) {
3086 maxcount = PY_SSIZE_T_MAX;
3087 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3088 /* nothing to do; return the original string */
3089 return return_self(self);
3090 }
3091
3092 from_len = PyString_GET_SIZE(from);
3093 to_len = PyString_GET_SIZE(to);
3094
3095 if (maxcount == 0 ||
3096 (from_len == 0 && to_len == 0)) {
3097 /* nothing to do; return the original string */
3098 return return_self(self);
3099 }
3100
3101 /* Handle zero-length special cases */
3102
3103 if (from_len == 0) {
3104 /* insert the 'to' string everywhere. */
3105 /* >>> "Python".replace("", ".") */
3106 /* '.P.y.t.h.o.n.' */
3107 return replace_interleave(self, to, maxcount);
3108 }
3109
3110 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3111 /* point for an empty self string to generate a non-empty string */
3112 /* Special case so the remaining code always gets a non-empty string */
3113 if (PyString_GET_SIZE(self) == 0) {
3114 return return_self(self);
3115 }
3116
3117 if (to_len == 0) {
3118 /* delete all occurances of 'from' string */
3119 if (from_len == 1) {
3120 return replace_delete_single_character(
3121 self, PyString_AS_STRING(from)[0], maxcount);
3122 } else {
3123 return replace_delete_substring(self, from, maxcount);
3124 }
3125 }
3126
3127 /* Handle special case where both strings have the same length */
3128
3129 if (from_len == to_len) {
3130 if (from_len == 1) {
3131 return replace_single_character_in_place(
3132 self,
3133 PyString_AS_STRING(from)[0],
3134 PyString_AS_STRING(to)[0],
3135 maxcount);
3136 } else {
3137 return replace_substring_in_place(
3138 self, from, to, maxcount);
3139 }
3140 }
3141
3142 /* Otherwise use the more generic algorithms */
3143 if (from_len == 1) {
3144 return replace_single_character(self, PyString_AS_STRING(from)[0],
3145 to, maxcount);
3146 } else {
3147 /* len('from')>=2, len('to')>=1 */
3148 return replace_substring(self, from, to, maxcount);
3149 }
3150}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003151
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003152PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003153"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003154\n\
3155Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003156old replaced by new. If the optional argument count is\n\
3157given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158
3159static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003160string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003161{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003162 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003163 PyObject *from, *to;
3164 char *tmp_s;
3165 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003167 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003168 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003170 if (PyString_Check(from)) {
3171 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003172 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003173#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003174 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003175 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003176 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003177#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003178 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179 return NULL;
3180
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003181 if (PyString_Check(to)) {
3182 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003183 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003184#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003185 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003186 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003187 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003188#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003189 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 return NULL;
3191
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003192 return (PyObject *)replace((PyStringObject *) self,
3193 (PyStringObject *) from,
3194 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003195}
3196
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003197/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003198
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003199PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003200"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003201\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003202Return True if S starts with the specified prefix, False otherwise.\n\
3203With optional start, test S beginning at that position.\n\
3204With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003205
3206static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003207string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003208{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003209 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003210 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003211 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003212 Py_ssize_t plen;
3213 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003214 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003215 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216
Guido van Rossumc6821402000-05-08 14:08:05 +00003217 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3218 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003219 return NULL;
3220 if (PyString_Check(subobj)) {
3221 prefix = PyString_AS_STRING(subobj);
3222 plen = PyString_GET_SIZE(subobj);
3223 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003224#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003225 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003226 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003227 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003228 subobj, start, end, -1);
3229 if (rc == -1)
3230 return NULL;
3231 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003232 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003233 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003234#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003236 return NULL;
3237
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003238 string_adjust_indices(&start, &end, len);
3239
3240 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003241 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003242
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003243 if (end-start >= plen)
3244 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3245 else
3246 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003247}
3248
3249
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003250PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003251"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003252\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003253Return True if S ends with the specified suffix, False otherwise.\n\
3254With optional start, test S beginning at that position.\n\
3255With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256
3257static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003258string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003259{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003261 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003263 Py_ssize_t slen;
3264 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003265 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003267
Guido van Rossumc6821402000-05-08 14:08:05 +00003268 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3269 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 return NULL;
3271 if (PyString_Check(subobj)) {
3272 suffix = PyString_AS_STRING(subobj);
3273 slen = PyString_GET_SIZE(subobj);
3274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003275#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003276 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003277 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003278 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003279 subobj, start, end, +1);
3280 if (rc == -1)
3281 return NULL;
3282 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003283 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003284 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003285#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003287 return NULL;
3288
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003289 string_adjust_indices(&start, &end, len);
3290
3291 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003292 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003293
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003294 if (end-slen > start)
3295 start = end - slen;
3296 if (end-start >= slen)
3297 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3298 else
3299 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003300}
3301
3302
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003303PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003304"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003305\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003306Encodes S using the codec registered for encoding. encoding defaults\n\
3307to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003308handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003309a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3310'xmlcharrefreplace' as well as any other name registered with\n\
3311codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003312
3313static PyObject *
3314string_encode(PyStringObject *self, PyObject *args)
3315{
3316 char *encoding = NULL;
3317 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003318 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003319
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003320 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3321 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003322 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003323 if (v == NULL)
3324 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003325 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3326 PyErr_Format(PyExc_TypeError,
3327 "encoder did not return a string/unicode object "
3328 "(type=%.400s)",
3329 v->ob_type->tp_name);
3330 Py_DECREF(v);
3331 return NULL;
3332 }
3333 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003334
3335 onError:
3336 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003337}
3338
3339
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003340PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003341"S.decode([encoding[,errors]]) -> object\n\
3342\n\
3343Decodes S using the codec registered for encoding. encoding defaults\n\
3344to the default encoding. errors may be given to set a different error\n\
3345handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003346a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3347as well as any other name registerd with codecs.register_error that is\n\
3348able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003349
3350static PyObject *
3351string_decode(PyStringObject *self, PyObject *args)
3352{
3353 char *encoding = NULL;
3354 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003355 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003356
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003357 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3358 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003359 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003360 if (v == NULL)
3361 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003362 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3363 PyErr_Format(PyExc_TypeError,
3364 "decoder did not return a string/unicode object "
3365 "(type=%.400s)",
3366 v->ob_type->tp_name);
3367 Py_DECREF(v);
3368 return NULL;
3369 }
3370 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003371
3372 onError:
3373 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003374}
3375
3376
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003377PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378"S.expandtabs([tabsize]) -> string\n\
3379\n\
3380Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003381If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382
3383static PyObject*
3384string_expandtabs(PyStringObject *self, PyObject *args)
3385{
3386 const char *e, *p;
3387 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003388 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003389 PyObject *u;
3390 int tabsize = 8;
3391
3392 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3393 return NULL;
3394
Thomas Wouters7e474022000-07-16 12:04:32 +00003395 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003396 i = j = 0;
3397 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3398 for (p = PyString_AS_STRING(self); p < e; p++)
3399 if (*p == '\t') {
3400 if (tabsize > 0)
3401 j += tabsize - (j % tabsize);
3402 }
3403 else {
3404 j++;
3405 if (*p == '\n' || *p == '\r') {
3406 i += j;
3407 j = 0;
3408 }
3409 }
3410
3411 /* Second pass: create output string and fill it */
3412 u = PyString_FromStringAndSize(NULL, i + j);
3413 if (!u)
3414 return NULL;
3415
3416 j = 0;
3417 q = PyString_AS_STRING(u);
3418
3419 for (p = PyString_AS_STRING(self); p < e; p++)
3420 if (*p == '\t') {
3421 if (tabsize > 0) {
3422 i = tabsize - (j % tabsize);
3423 j += i;
3424 while (i--)
3425 *q++ = ' ';
3426 }
3427 }
3428 else {
3429 j++;
3430 *q++ = *p;
3431 if (*p == '\n' || *p == '\r')
3432 j = 0;
3433 }
3434
3435 return u;
3436}
3437
Tim Peters8fa5dd02001-09-12 02:18:30 +00003438static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003439pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440{
3441 PyObject *u;
3442
3443 if (left < 0)
3444 left = 0;
3445 if (right < 0)
3446 right = 0;
3447
Tim Peters8fa5dd02001-09-12 02:18:30 +00003448 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449 Py_INCREF(self);
3450 return (PyObject *)self;
3451 }
3452
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003453 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454 left + PyString_GET_SIZE(self) + right);
3455 if (u) {
3456 if (left)
3457 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003458 memcpy(PyString_AS_STRING(u) + left,
3459 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003460 PyString_GET_SIZE(self));
3461 if (right)
3462 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3463 fill, right);
3464 }
3465
3466 return u;
3467}
3468
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003469PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003470"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003471"\n"
3472"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474
3475static PyObject *
3476string_ljust(PyStringObject *self, PyObject *args)
3477{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003478 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003479 char fillchar = ' ';
3480
Thomas Wouters4abb3662006-04-19 14:50:15 +00003481 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003482 return NULL;
3483
Tim Peters8fa5dd02001-09-12 02:18:30 +00003484 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003485 Py_INCREF(self);
3486 return (PyObject*) self;
3487 }
3488
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003489 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003490}
3491
3492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003493PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003494"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003495"\n"
3496"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003497"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003498
3499static PyObject *
3500string_rjust(PyStringObject *self, PyObject *args)
3501{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003502 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003503 char fillchar = ' ';
3504
Thomas Wouters4abb3662006-04-19 14:50:15 +00003505 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506 return NULL;
3507
Tim Peters8fa5dd02001-09-12 02:18:30 +00003508 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509 Py_INCREF(self);
3510 return (PyObject*) self;
3511 }
3512
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003513 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514}
3515
3516
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003517PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003518"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003519"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003520"Return S centered in a string of length width. Padding is\n"
3521"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522
3523static PyObject *
3524string_center(PyStringObject *self, PyObject *args)
3525{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003526 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003527 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003528 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529
Thomas Wouters4abb3662006-04-19 14:50:15 +00003530 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003531 return NULL;
3532
Tim Peters8fa5dd02001-09-12 02:18:30 +00003533 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534 Py_INCREF(self);
3535 return (PyObject*) self;
3536 }
3537
3538 marg = width - PyString_GET_SIZE(self);
3539 left = marg / 2 + (marg & width & 1);
3540
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003541 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542}
3543
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003544PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003545"S.zfill(width) -> string\n"
3546"\n"
3547"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003549
3550static PyObject *
3551string_zfill(PyStringObject *self, PyObject *args)
3552{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003553 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003554 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003555 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003556 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003557
Thomas Wouters4abb3662006-04-19 14:50:15 +00003558 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003559 return NULL;
3560
3561 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003562 if (PyString_CheckExact(self)) {
3563 Py_INCREF(self);
3564 return (PyObject*) self;
3565 }
3566 else
3567 return PyString_FromStringAndSize(
3568 PyString_AS_STRING(self),
3569 PyString_GET_SIZE(self)
3570 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003571 }
3572
3573 fill = width - PyString_GET_SIZE(self);
3574
3575 s = pad(self, fill, 0, '0');
3576
3577 if (s == NULL)
3578 return NULL;
3579
3580 p = PyString_AS_STRING(s);
3581 if (p[fill] == '+' || p[fill] == '-') {
3582 /* move sign to beginning of string */
3583 p[0] = p[fill];
3584 p[fill] = '0';
3585 }
3586
3587 return (PyObject*) s;
3588}
3589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003590PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003591"S.isspace() -> bool\n\
3592\n\
3593Return True if all characters in S are whitespace\n\
3594and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595
3596static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003597string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598{
Fred Drakeba096332000-07-09 07:04:36 +00003599 register const unsigned char *p
3600 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003601 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603 /* Shortcut for single character strings */
3604 if (PyString_GET_SIZE(self) == 1 &&
3605 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003608 /* Special case for empty strings */
3609 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003611
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612 e = p + PyString_GET_SIZE(self);
3613 for (; p < e; p++) {
3614 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618}
3619
3620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003621PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003622"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003624Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003625and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003626
3627static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003628string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003629{
Fred Drakeba096332000-07-09 07:04:36 +00003630 register const unsigned char *p
3631 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003632 register const unsigned char *e;
3633
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003634 /* Shortcut for single character strings */
3635 if (PyString_GET_SIZE(self) == 1 &&
3636 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003638
3639 /* Special case for empty strings */
3640 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003642
3643 e = p + PyString_GET_SIZE(self);
3644 for (; p < e; p++) {
3645 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003647 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649}
3650
3651
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003652PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003654\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003655Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003656and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003657
3658static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003659string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003660{
Fred Drakeba096332000-07-09 07:04:36 +00003661 register const unsigned char *p
3662 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003663 register const unsigned char *e;
3664
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003665 /* Shortcut for single character strings */
3666 if (PyString_GET_SIZE(self) == 1 &&
3667 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003669
3670 /* Special case for empty strings */
3671 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003673
3674 e = p + PyString_GET_SIZE(self);
3675 for (; p < e; p++) {
3676 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003678 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680}
3681
3682
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003683PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003686Return True if all characters in S are digits\n\
3687and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688
3689static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003690string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691{
Fred Drakeba096332000-07-09 07:04:36 +00003692 register const unsigned char *p
3693 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003694 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 /* Shortcut for single character strings */
3697 if (PyString_GET_SIZE(self) == 1 &&
3698 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003701 /* Special case for empty strings */
3702 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003704
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705 e = p + PyString_GET_SIZE(self);
3706 for (; p < e; p++) {
3707 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711}
3712
3713
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003714PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003717Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003718at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719
3720static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003721string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722{
Fred Drakeba096332000-07-09 07:04:36 +00003723 register const unsigned char *p
3724 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003725 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726 int cased;
3727
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 /* Shortcut for single character strings */
3729 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003732 /* Special case for empty strings */
3733 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003735
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736 e = p + PyString_GET_SIZE(self);
3737 cased = 0;
3738 for (; p < e; p++) {
3739 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003740 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741 else if (!cased && islower(*p))
3742 cased = 1;
3743 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745}
3746
3747
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003748PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003751Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003752at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753
3754static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003755string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756{
Fred Drakeba096332000-07-09 07:04:36 +00003757 register const unsigned char *p
3758 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003759 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 int cased;
3761
Guido van Rossum4c08d552000-03-10 22:55:18 +00003762 /* Shortcut for single character strings */
3763 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003766 /* Special case for empty strings */
3767 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003768 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003769
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770 e = p + PyString_GET_SIZE(self);
3771 cased = 0;
3772 for (; p < e; p++) {
3773 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003774 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775 else if (!cased && isupper(*p))
3776 cased = 1;
3777 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003778 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779}
3780
3781
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003782PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003783"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003785Return True if S is a titlecased string and there is at least one\n\
3786character in S, i.e. uppercase characters may only follow uncased\n\
3787characters and lowercase characters only cased ones. Return False\n\
3788otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789
3790static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003791string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792{
Fred Drakeba096332000-07-09 07:04:36 +00003793 register const unsigned char *p
3794 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003795 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796 int cased, previous_is_cased;
3797
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798 /* Shortcut for single character strings */
3799 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003800 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003802 /* Special case for empty strings */
3803 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003804 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003805
Guido van Rossum4c08d552000-03-10 22:55:18 +00003806 e = p + PyString_GET_SIZE(self);
3807 cased = 0;
3808 previous_is_cased = 0;
3809 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003810 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811
3812 if (isupper(ch)) {
3813 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003814 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 previous_is_cased = 1;
3816 cased = 1;
3817 }
3818 else if (islower(ch)) {
3819 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003820 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 previous_is_cased = 1;
3822 cased = 1;
3823 }
3824 else
3825 previous_is_cased = 0;
3826 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003827 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003828}
3829
3830
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003831PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003832"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003833\n\
3834Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003835Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003836is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837
Guido van Rossum4c08d552000-03-10 22:55:18 +00003838static PyObject*
3839string_splitlines(PyStringObject *self, PyObject *args)
3840{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003841 register Py_ssize_t i;
3842 register Py_ssize_t j;
3843 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003844 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845 PyObject *list;
3846 PyObject *str;
3847 char *data;
3848
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003849 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003850 return NULL;
3851
3852 data = PyString_AS_STRING(self);
3853 len = PyString_GET_SIZE(self);
3854
Guido van Rossum4c08d552000-03-10 22:55:18 +00003855 list = PyList_New(0);
3856 if (!list)
3857 goto onError;
3858
3859 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003860 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003861
Guido van Rossum4c08d552000-03-10 22:55:18 +00003862 /* Find a line and append it */
3863 while (i < len && data[i] != '\n' && data[i] != '\r')
3864 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003865
3866 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003867 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003868 if (i < len) {
3869 if (data[i] == '\r' && i + 1 < len &&
3870 data[i+1] == '\n')
3871 i += 2;
3872 else
3873 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003874 if (keepends)
3875 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003876 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003877 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003878 j = i;
3879 }
3880 if (j < len) {
3881 SPLIT_APPEND(data, j, len);
3882 }
3883
3884 return list;
3885
3886 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003887 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003888 return NULL;
3889}
3890
3891#undef SPLIT_APPEND
3892
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003893static PyObject *
3894string_getnewargs(PyStringObject *v)
3895{
3896 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3897}
3898
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003899
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003900static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003901string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003902 /* Counterparts of the obsolete stropmodule functions; except
3903 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003904 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3905 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003906 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003907 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3908 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003909 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3910 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3911 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3912 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3913 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3914 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3915 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003916 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3917 capitalize__doc__},
3918 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3919 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3920 endswith__doc__},
3921 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3922 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3923 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3924 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3925 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3926 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3927 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3928 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3929 startswith__doc__},
3930 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3931 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3932 swapcase__doc__},
3933 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3934 translate__doc__},
3935 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3936 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3937 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3938 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3939 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3940 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3941 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3942 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3943 expandtabs__doc__},
3944 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3945 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003946 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003947 {NULL, NULL} /* sentinel */
3948};
3949
Jeremy Hylton938ace62002-07-17 16:30:39 +00003950static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003951str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3952
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003953static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003954string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003955{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003956 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003957 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003958
Guido van Rossumae960af2001-08-30 03:11:59 +00003959 if (type != &PyString_Type)
3960 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003961 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3962 return NULL;
3963 if (x == NULL)
3964 return PyString_FromString("");
3965 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003966}
3967
Guido van Rossumae960af2001-08-30 03:11:59 +00003968static PyObject *
3969str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3970{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003971 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003972 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003973
3974 assert(PyType_IsSubtype(type, &PyString_Type));
3975 tmp = string_new(&PyString_Type, args, kwds);
3976 if (tmp == NULL)
3977 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003978 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003979 n = PyString_GET_SIZE(tmp);
3980 pnew = type->tp_alloc(type, n);
3981 if (pnew != NULL) {
3982 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003983 ((PyStringObject *)pnew)->ob_shash =
3984 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003985 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003986 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003987 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003988 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003989}
3990
Guido van Rossumcacfc072002-05-24 19:01:59 +00003991static PyObject *
3992basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3993{
3994 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003995 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003996 return NULL;
3997}
3998
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003999static PyObject *
4000string_mod(PyObject *v, PyObject *w)
4001{
4002 if (!PyString_Check(v)) {
4003 Py_INCREF(Py_NotImplemented);
4004 return Py_NotImplemented;
4005 }
4006 return PyString_Format(v, w);
4007}
4008
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004009PyDoc_STRVAR(basestring_doc,
4010"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004011
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004012static PyNumberMethods string_as_number = {
4013 0, /*nb_add*/
4014 0, /*nb_subtract*/
4015 0, /*nb_multiply*/
4016 0, /*nb_divide*/
4017 string_mod, /*nb_remainder*/
4018};
4019
4020
Guido van Rossumcacfc072002-05-24 19:01:59 +00004021PyTypeObject PyBaseString_Type = {
4022 PyObject_HEAD_INIT(&PyType_Type)
4023 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004024 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004025 0,
4026 0,
4027 0, /* tp_dealloc */
4028 0, /* tp_print */
4029 0, /* tp_getattr */
4030 0, /* tp_setattr */
4031 0, /* tp_compare */
4032 0, /* tp_repr */
4033 0, /* tp_as_number */
4034 0, /* tp_as_sequence */
4035 0, /* tp_as_mapping */
4036 0, /* tp_hash */
4037 0, /* tp_call */
4038 0, /* tp_str */
4039 0, /* tp_getattro */
4040 0, /* tp_setattro */
4041 0, /* tp_as_buffer */
4042 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4043 basestring_doc, /* tp_doc */
4044 0, /* tp_traverse */
4045 0, /* tp_clear */
4046 0, /* tp_richcompare */
4047 0, /* tp_weaklistoffset */
4048 0, /* tp_iter */
4049 0, /* tp_iternext */
4050 0, /* tp_methods */
4051 0, /* tp_members */
4052 0, /* tp_getset */
4053 &PyBaseObject_Type, /* tp_base */
4054 0, /* tp_dict */
4055 0, /* tp_descr_get */
4056 0, /* tp_descr_set */
4057 0, /* tp_dictoffset */
4058 0, /* tp_init */
4059 0, /* tp_alloc */
4060 basestring_new, /* tp_new */
4061 0, /* tp_free */
4062};
4063
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004064PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004065"str(object) -> string\n\
4066\n\
4067Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004068If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004069
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070PyTypeObject PyString_Type = {
4071 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004072 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004073 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004074 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004075 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004076 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004077 (printfunc)string_print, /* tp_print */
4078 0, /* tp_getattr */
4079 0, /* tp_setattr */
4080 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004081 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004082 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004083 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004084 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004085 (hashfunc)string_hash, /* tp_hash */
4086 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004087 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004088 PyObject_GenericGetAttr, /* tp_getattro */
4089 0, /* tp_setattro */
4090 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004091 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004092 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004093 string_doc, /* tp_doc */
4094 0, /* tp_traverse */
4095 0, /* tp_clear */
4096 (richcmpfunc)string_richcompare, /* tp_richcompare */
4097 0, /* tp_weaklistoffset */
4098 0, /* tp_iter */
4099 0, /* tp_iternext */
4100 string_methods, /* tp_methods */
4101 0, /* tp_members */
4102 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004103 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004104 0, /* tp_dict */
4105 0, /* tp_descr_get */
4106 0, /* tp_descr_set */
4107 0, /* tp_dictoffset */
4108 0, /* tp_init */
4109 0, /* tp_alloc */
4110 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004111 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004112};
4113
4114void
Fred Drakeba096332000-07-09 07:04:36 +00004115PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004116{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004117 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004118 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004119 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004120 if (w == NULL || !PyString_Check(*pv)) {
4121 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004122 *pv = NULL;
4123 return;
4124 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004125 v = string_concat((PyStringObject *) *pv, w);
4126 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127 *pv = v;
4128}
4129
Guido van Rossum013142a1994-08-30 08:19:36 +00004130void
Fred Drakeba096332000-07-09 07:04:36 +00004131PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004132{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004133 PyString_Concat(pv, w);
4134 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004135}
4136
4137
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004138/* The following function breaks the notion that strings are immutable:
4139 it changes the size of a string. We get away with this only if there
4140 is only one module referencing the object. You can also think of it
4141 as creating a new string object and destroying the old one, only
4142 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004143 already be known to some other part of the code...
4144 Note that if there's not enough memory to resize the string, the original
4145 string object at *pv is deallocated, *pv is set to NULL, an "out of
4146 memory" exception is set, and -1 is returned. Else (on success) 0 is
4147 returned, and the value in *pv may or may not be the same as on input.
4148 As always, an extra byte is allocated for a trailing \0 byte (newsize
4149 does *not* include that), and a trailing \0 byte is stored.
4150*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004151
4152int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004153_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004154{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004155 register PyObject *v;
4156 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004157 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004158 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4159 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004160 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004161 Py_DECREF(v);
4162 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004163 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004164 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004165 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004166 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 _Py_ForgetReference(v);
4168 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004169 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004170 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004171 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004172 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004173 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004174 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004175 _Py_NewReference(*pv);
4176 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004177 sv->ob_size = newsize;
4178 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004179 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004180 return 0;
4181}
Guido van Rossume5372401993-03-16 12:15:04 +00004182
4183/* Helpers for formatstring */
4184
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004185static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004186getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004187{
Thomas Wouters977485d2006-02-16 15:59:12 +00004188 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004189 if (argidx < arglen) {
4190 (*p_argidx)++;
4191 if (arglen < 0)
4192 return args;
4193 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004194 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004195 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004196 PyErr_SetString(PyExc_TypeError,
4197 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004198 return NULL;
4199}
4200
Tim Peters38fd5b62000-09-21 05:43:11 +00004201/* Format codes
4202 * F_LJUST '-'
4203 * F_SIGN '+'
4204 * F_BLANK ' '
4205 * F_ALT '#'
4206 * F_ZERO '0'
4207 */
Guido van Rossume5372401993-03-16 12:15:04 +00004208#define F_LJUST (1<<0)
4209#define F_SIGN (1<<1)
4210#define F_BLANK (1<<2)
4211#define F_ALT (1<<3)
4212#define F_ZERO (1<<4)
4213
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004214static int
Fred Drakeba096332000-07-09 07:04:36 +00004215formatfloat(char *buf, size_t buflen, int flags,
4216 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004217{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004218 /* fmt = '%#.' + `prec` + `type`
4219 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004220 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004221 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004222 x = PyFloat_AsDouble(v);
4223 if (x == -1.0 && PyErr_Occurred()) {
4224 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004225 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004226 }
Guido van Rossume5372401993-03-16 12:15:04 +00004227 if (prec < 0)
4228 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004229 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4230 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004231 /* Worst case length calc to ensure no buffer overrun:
4232
4233 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004234 fmt = %#.<prec>g
4235 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004236 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004237 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004238
4239 'f' formats:
4240 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4241 len = 1 + 50 + 1 + prec = 52 + prec
4242
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004243 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004244 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004245
4246 */
4247 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4248 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004249 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004250 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004251 return -1;
4252 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004253 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4254 (flags&F_ALT) ? "#" : "",
4255 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004256 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004257 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004258}
4259
Tim Peters38fd5b62000-09-21 05:43:11 +00004260/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4261 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4262 * Python's regular ints.
4263 * Return value: a new PyString*, or NULL if error.
4264 * . *pbuf is set to point into it,
4265 * *plen set to the # of chars following that.
4266 * Caller must decref it when done using pbuf.
4267 * The string starting at *pbuf is of the form
4268 * "-"? ("0x" | "0X")? digit+
4269 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004270 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004271 * There will be at least prec digits, zero-filled on the left if
4272 * necessary to get that many.
4273 * val object to be converted
4274 * flags bitmask of format flags; only F_ALT is looked at
4275 * prec minimum number of digits; 0-fill on left if needed
4276 * type a character in [duoxX]; u acts the same as d
4277 *
4278 * CAUTION: o, x and X conversions on regular ints can never
4279 * produce a '-' sign, but can for Python's unbounded ints.
4280 */
4281PyObject*
4282_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4283 char **pbuf, int *plen)
4284{
4285 PyObject *result = NULL;
4286 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004287 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004288 int sign; /* 1 if '-', else 0 */
4289 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004290 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004291 int numdigits; /* len == numnondigits + numdigits */
4292 int numnondigits = 0;
4293
4294 switch (type) {
4295 case 'd':
4296 case 'u':
4297 result = val->ob_type->tp_str(val);
4298 break;
4299 case 'o':
4300 result = val->ob_type->tp_as_number->nb_oct(val);
4301 break;
4302 case 'x':
4303 case 'X':
4304 numnondigits = 2;
4305 result = val->ob_type->tp_as_number->nb_hex(val);
4306 break;
4307 default:
4308 assert(!"'type' not in [duoxX]");
4309 }
4310 if (!result)
4311 return NULL;
4312
4313 /* To modify the string in-place, there can only be one reference. */
4314 if (result->ob_refcnt != 1) {
4315 PyErr_BadInternalCall();
4316 return NULL;
4317 }
4318 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004319 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004320 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004321 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4322 return NULL;
4323 }
4324 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004325 if (buf[len-1] == 'L') {
4326 --len;
4327 buf[len] = '\0';
4328 }
4329 sign = buf[0] == '-';
4330 numnondigits += sign;
4331 numdigits = len - numnondigits;
4332 assert(numdigits > 0);
4333
Tim Petersfff53252001-04-12 18:38:48 +00004334 /* Get rid of base marker unless F_ALT */
4335 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004336 /* Need to skip 0x, 0X or 0. */
4337 int skipped = 0;
4338 switch (type) {
4339 case 'o':
4340 assert(buf[sign] == '0');
4341 /* If 0 is only digit, leave it alone. */
4342 if (numdigits > 1) {
4343 skipped = 1;
4344 --numdigits;
4345 }
4346 break;
4347 case 'x':
4348 case 'X':
4349 assert(buf[sign] == '0');
4350 assert(buf[sign + 1] == 'x');
4351 skipped = 2;
4352 numnondigits -= 2;
4353 break;
4354 }
4355 if (skipped) {
4356 buf += skipped;
4357 len -= skipped;
4358 if (sign)
4359 buf[0] = '-';
4360 }
4361 assert(len == numnondigits + numdigits);
4362 assert(numdigits > 0);
4363 }
4364
4365 /* Fill with leading zeroes to meet minimum width. */
4366 if (prec > numdigits) {
4367 PyObject *r1 = PyString_FromStringAndSize(NULL,
4368 numnondigits + prec);
4369 char *b1;
4370 if (!r1) {
4371 Py_DECREF(result);
4372 return NULL;
4373 }
4374 b1 = PyString_AS_STRING(r1);
4375 for (i = 0; i < numnondigits; ++i)
4376 *b1++ = *buf++;
4377 for (i = 0; i < prec - numdigits; i++)
4378 *b1++ = '0';
4379 for (i = 0; i < numdigits; i++)
4380 *b1++ = *buf++;
4381 *b1 = '\0';
4382 Py_DECREF(result);
4383 result = r1;
4384 buf = PyString_AS_STRING(result);
4385 len = numnondigits + prec;
4386 }
4387
4388 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004389 if (type == 'X') {
4390 /* Need to convert all lower case letters to upper case.
4391 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004392 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004393 if (buf[i] >= 'a' && buf[i] <= 'x')
4394 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004395 }
4396 *pbuf = buf;
4397 *plen = len;
4398 return result;
4399}
4400
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004401static int
Fred Drakeba096332000-07-09 07:04:36 +00004402formatint(char *buf, size_t buflen, int flags,
4403 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004404{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004405 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004406 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4407 + 1 + 1 = 24 */
4408 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004409 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004410 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004411
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004412 x = PyInt_AsLong(v);
4413 if (x == -1 && PyErr_Occurred()) {
4414 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004415 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004416 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004417 if (x < 0 && type == 'u') {
4418 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004419 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004420 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4421 sign = "-";
4422 else
4423 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004424 if (prec < 0)
4425 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004426
4427 if ((flags & F_ALT) &&
4428 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004429 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004430 * of issues that cause pain:
4431 * - when 0 is being converted, the C standard leaves off
4432 * the '0x' or '0X', which is inconsistent with other
4433 * %#x/%#X conversions and inconsistent with Python's
4434 * hex() function
4435 * - there are platforms that violate the standard and
4436 * convert 0 with the '0x' or '0X'
4437 * (Metrowerks, Compaq Tru64)
4438 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004439 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004440 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004441 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004442 * We can achieve the desired consistency by inserting our
4443 * own '0x' or '0X' prefix, and substituting %x/%X in place
4444 * of %#x/%#X.
4445 *
4446 * Note that this is the same approach as used in
4447 * formatint() in unicodeobject.c
4448 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004449 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4450 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004451 }
4452 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004453 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4454 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004455 prec, type);
4456 }
4457
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004458 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4459 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004460 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004461 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004462 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004463 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004464 return -1;
4465 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004466 if (sign[0])
4467 PyOS_snprintf(buf, buflen, fmt, -x);
4468 else
4469 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004470 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004471}
4472
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004473static int
Fred Drakeba096332000-07-09 07:04:36 +00004474formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004475{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004476 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477 if (PyString_Check(v)) {
4478 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004479 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004480 }
4481 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004482 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004483 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004484 }
4485 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004486 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004487}
4488
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004489/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4490
4491 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4492 chars are formatted. XXX This is a magic number. Each formatting
4493 routine does bounds checking to ensure no overflow, but a better
4494 solution may be to malloc a buffer of appropriate size for each
4495 format. For now, the current solution is sufficient.
4496*/
4497#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004498
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004499PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004500PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004501{
4502 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004503 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004504 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004505 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004506 PyObject *result, *orig_args;
4507#ifdef Py_USING_UNICODE
4508 PyObject *v, *w;
4509#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004510 PyObject *dict = NULL;
4511 if (format == NULL || !PyString_Check(format) || args == NULL) {
4512 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004513 return NULL;
4514 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004515 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004516 fmt = PyString_AS_STRING(format);
4517 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004518 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004519 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004520 if (result == NULL)
4521 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004522 res = PyString_AsString(result);
4523 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004524 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004525 argidx = 0;
4526 }
4527 else {
4528 arglen = -1;
4529 argidx = -2;
4530 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004531 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4532 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004533 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004534 while (--fmtcnt >= 0) {
4535 if (*fmt != '%') {
4536 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004537 rescnt = fmtcnt + 100;
4538 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004539 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004540 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004541 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004543 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004544 }
4545 *res++ = *fmt++;
4546 }
4547 else {
4548 /* Got a format specifier */
4549 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004550 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004551 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004552 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004553 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 PyObject *v = NULL;
4555 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004556 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004557 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004558 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004559 char formatbuf[FORMATBUFLEN];
4560 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004561#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004562 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004563 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004564#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004565
Guido van Rossumda9c2711996-12-05 21:58:58 +00004566 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004567 if (*fmt == '(') {
4568 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004569 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004570 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004571 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004572
4573 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004574 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004575 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004576 goto error;
4577 }
4578 ++fmt;
4579 --fmtcnt;
4580 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004581 /* Skip over balanced parentheses */
4582 while (pcount > 0 && --fmtcnt >= 0) {
4583 if (*fmt == ')')
4584 --pcount;
4585 else if (*fmt == '(')
4586 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004587 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004588 }
4589 keylen = fmt - keystart - 1;
4590 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004591 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004592 "incomplete format key");
4593 goto error;
4594 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 key = PyString_FromStringAndSize(keystart,
4596 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004597 if (key == NULL)
4598 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004599 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004600 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004601 args_owned = 0;
4602 }
4603 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004604 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004605 if (args == NULL) {
4606 goto error;
4607 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004608 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004609 arglen = -1;
4610 argidx = -2;
4611 }
Guido van Rossume5372401993-03-16 12:15:04 +00004612 while (--fmtcnt >= 0) {
4613 switch (c = *fmt++) {
4614 case '-': flags |= F_LJUST; continue;
4615 case '+': flags |= F_SIGN; continue;
4616 case ' ': flags |= F_BLANK; continue;
4617 case '#': flags |= F_ALT; continue;
4618 case '0': flags |= F_ZERO; continue;
4619 }
4620 break;
4621 }
4622 if (c == '*') {
4623 v = getnextarg(args, arglen, &argidx);
4624 if (v == NULL)
4625 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004626 if (!PyInt_Check(v)) {
4627 PyErr_SetString(PyExc_TypeError,
4628 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004629 goto error;
4630 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004631 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004632 if (width < 0) {
4633 flags |= F_LJUST;
4634 width = -width;
4635 }
Guido van Rossume5372401993-03-16 12:15:04 +00004636 if (--fmtcnt >= 0)
4637 c = *fmt++;
4638 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004639 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004640 width = c - '0';
4641 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004642 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004643 if (!isdigit(c))
4644 break;
4645 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004646 PyErr_SetString(
4647 PyExc_ValueError,
4648 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004649 goto error;
4650 }
4651 width = width*10 + (c - '0');
4652 }
4653 }
4654 if (c == '.') {
4655 prec = 0;
4656 if (--fmtcnt >= 0)
4657 c = *fmt++;
4658 if (c == '*') {
4659 v = getnextarg(args, arglen, &argidx);
4660 if (v == NULL)
4661 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004662 if (!PyInt_Check(v)) {
4663 PyErr_SetString(
4664 PyExc_TypeError,
4665 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004666 goto error;
4667 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004668 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004669 if (prec < 0)
4670 prec = 0;
4671 if (--fmtcnt >= 0)
4672 c = *fmt++;
4673 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004674 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004675 prec = c - '0';
4676 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004677 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004678 if (!isdigit(c))
4679 break;
4680 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004681 PyErr_SetString(
4682 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004683 "prec too big");
4684 goto error;
4685 }
4686 prec = prec*10 + (c - '0');
4687 }
4688 }
4689 } /* prec */
4690 if (fmtcnt >= 0) {
4691 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004692 if (--fmtcnt >= 0)
4693 c = *fmt++;
4694 }
4695 }
4696 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004697 PyErr_SetString(PyExc_ValueError,
4698 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004699 goto error;
4700 }
4701 if (c != '%') {
4702 v = getnextarg(args, arglen, &argidx);
4703 if (v == NULL)
4704 goto error;
4705 }
4706 sign = 0;
4707 fill = ' ';
4708 switch (c) {
4709 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004710 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004711 len = 1;
4712 break;
4713 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004714#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004715 if (PyUnicode_Check(v)) {
4716 fmt = fmt_start;
4717 argidx = argidx_start;
4718 goto unicode;
4719 }
Georg Brandld45014b2005-10-01 17:06:00 +00004720#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004721 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004722#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004723 if (temp != NULL && PyUnicode_Check(temp)) {
4724 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004725 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004726 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004727 goto unicode;
4728 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004729#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004730 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004731 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004732 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004733 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004734 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004735 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004736 if (!PyString_Check(temp)) {
4737 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004738 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004739 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004740 goto error;
4741 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004742 pbuf = PyString_AS_STRING(temp);
4743 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004744 if (prec >= 0 && len > prec)
4745 len = prec;
4746 break;
4747 case 'i':
4748 case 'd':
4749 case 'u':
4750 case 'o':
4751 case 'x':
4752 case 'X':
4753 if (c == 'i')
4754 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004755 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004756 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004757 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004758 prec, c, &pbuf, &ilen);
4759 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004760 if (!temp)
4761 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004762 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004763 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 else {
4765 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004766 len = formatint(pbuf,
4767 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004768 flags, prec, c, v);
4769 if (len < 0)
4770 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004771 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004772 }
4773 if (flags & F_ZERO)
4774 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004775 break;
4776 case 'e':
4777 case 'E':
4778 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004779 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004780 case 'g':
4781 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004782 if (c == 'F')
4783 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004784 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004785 len = formatfloat(pbuf, sizeof(formatbuf),
4786 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004787 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004788 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004789 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004791 fill = '0';
4792 break;
4793 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004794#ifdef Py_USING_UNICODE
4795 if (PyUnicode_Check(v)) {
4796 fmt = fmt_start;
4797 argidx = argidx_start;
4798 goto unicode;
4799 }
4800#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004801 pbuf = formatbuf;
4802 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004803 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004804 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004805 break;
4806 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004807 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004808 "unsupported format character '%c' (0x%x) "
4809 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004810 c, c,
4811 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004812 goto error;
4813 }
4814 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004815 if (*pbuf == '-' || *pbuf == '+') {
4816 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004817 len--;
4818 }
4819 else if (flags & F_SIGN)
4820 sign = '+';
4821 else if (flags & F_BLANK)
4822 sign = ' ';
4823 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004825 }
4826 if (width < len)
4827 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004828 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004829 reslen -= rescnt;
4830 rescnt = width + fmtcnt + 100;
4831 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004832 if (reslen < 0) {
4833 Py_DECREF(result);
4834 return PyErr_NoMemory();
4835 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004836 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004837 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004838 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004839 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004840 }
4841 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004842 if (fill != ' ')
4843 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004844 rescnt--;
4845 if (width > len)
4846 width--;
4847 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004848 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4849 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004850 assert(pbuf[1] == c);
4851 if (fill != ' ') {
4852 *res++ = *pbuf++;
4853 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004854 }
Tim Petersfff53252001-04-12 18:38:48 +00004855 rescnt -= 2;
4856 width -= 2;
4857 if (width < 0)
4858 width = 0;
4859 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004860 }
4861 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004862 do {
4863 --rescnt;
4864 *res++ = fill;
4865 } while (--width > len);
4866 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004867 if (fill == ' ') {
4868 if (sign)
4869 *res++ = sign;
4870 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004871 (c == 'x' || c == 'X')) {
4872 assert(pbuf[0] == '0');
4873 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004874 *res++ = *pbuf++;
4875 *res++ = *pbuf++;
4876 }
4877 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004878 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004879 res += len;
4880 rescnt -= len;
4881 while (--width >= len) {
4882 --rescnt;
4883 *res++ = ' ';
4884 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004885 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004886 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004887 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004888 goto error;
4889 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004890 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004891 } /* '%' */
4892 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004893 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004894 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004895 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004896 goto error;
4897 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004898 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004899 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004900 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004901 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004902 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004903
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004904#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004905 unicode:
4906 if (args_owned) {
4907 Py_DECREF(args);
4908 args_owned = 0;
4909 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004910 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004911 if (PyTuple_Check(orig_args) && argidx > 0) {
4912 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004913 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004914 v = PyTuple_New(n);
4915 if (v == NULL)
4916 goto error;
4917 while (--n >= 0) {
4918 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4919 Py_INCREF(w);
4920 PyTuple_SET_ITEM(v, n, w);
4921 }
4922 args = v;
4923 } else {
4924 Py_INCREF(orig_args);
4925 args = orig_args;
4926 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004927 args_owned = 1;
4928 /* Take what we have of the result and let the Unicode formatting
4929 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004930 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004931 if (_PyString_Resize(&result, rescnt))
4932 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004933 fmtcnt = PyString_GET_SIZE(format) - \
4934 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004935 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4936 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004937 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004938 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004939 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004940 if (v == NULL)
4941 goto error;
4942 /* Paste what we have (result) to what the Unicode formatting
4943 function returned (v) and return the result (or error) */
4944 w = PyUnicode_Concat(result, v);
4945 Py_DECREF(result);
4946 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004947 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004948 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004949#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004950
Guido van Rossume5372401993-03-16 12:15:04 +00004951 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004952 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004953 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004954 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004955 }
Guido van Rossume5372401993-03-16 12:15:04 +00004956 return NULL;
4957}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004958
Guido van Rossum2a61e741997-01-18 07:55:05 +00004959void
Fred Drakeba096332000-07-09 07:04:36 +00004960PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004961{
4962 register PyStringObject *s = (PyStringObject *)(*p);
4963 PyObject *t;
4964 if (s == NULL || !PyString_Check(s))
4965 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004966 /* If it's a string subclass, we don't really know what putting
4967 it in the interned dict might do. */
4968 if (!PyString_CheckExact(s))
4969 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004970 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004971 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004972 if (interned == NULL) {
4973 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004974 if (interned == NULL) {
4975 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004976 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004977 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004978 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004979 t = PyDict_GetItem(interned, (PyObject *)s);
4980 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004981 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004982 Py_DECREF(*p);
4983 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004984 return;
4985 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004986
Armin Rigo79f7ad22004-08-07 19:27:39 +00004987 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004988 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004989 return;
4990 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004991 /* The two references in interned are not counted by refcnt.
4992 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004993 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004994 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004995}
4996
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004997void
4998PyString_InternImmortal(PyObject **p)
4999{
5000 PyString_InternInPlace(p);
5001 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5002 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5003 Py_INCREF(*p);
5004 }
5005}
5006
Guido van Rossum2a61e741997-01-18 07:55:05 +00005007
5008PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005009PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005010{
5011 PyObject *s = PyString_FromString(cp);
5012 if (s == NULL)
5013 return NULL;
5014 PyString_InternInPlace(&s);
5015 return s;
5016}
5017
Guido van Rossum8cf04761997-08-02 02:57:45 +00005018void
Fred Drakeba096332000-07-09 07:04:36 +00005019PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005020{
5021 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005022 for (i = 0; i < UCHAR_MAX + 1; i++) {
5023 Py_XDECREF(characters[i]);
5024 characters[i] = NULL;
5025 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005026 Py_XDECREF(nullstring);
5027 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005028}
Barry Warsawa903ad982001-02-23 16:40:48 +00005029
Barry Warsawa903ad982001-02-23 16:40:48 +00005030void _Py_ReleaseInternedStrings(void)
5031{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005032 PyObject *keys;
5033 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005034 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005035
5036 if (interned == NULL || !PyDict_Check(interned))
5037 return;
5038 keys = PyDict_Keys(interned);
5039 if (keys == NULL || !PyList_Check(keys)) {
5040 PyErr_Clear();
5041 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005042 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005043
5044 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5045 detector, interned strings are not forcibly deallocated; rather, we
5046 give them their stolen references back, and then clear and DECREF
5047 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005048
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005049 fprintf(stderr, "releasing interned strings\n");
5050 n = PyList_GET_SIZE(keys);
5051 for (i = 0; i < n; i++) {
5052 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5053 switch (s->ob_sstate) {
5054 case SSTATE_NOT_INTERNED:
5055 /* XXX Shouldn't happen */
5056 break;
5057 case SSTATE_INTERNED_IMMORTAL:
5058 s->ob_refcnt += 1;
5059 break;
5060 case SSTATE_INTERNED_MORTAL:
5061 s->ob_refcnt += 2;
5062 break;
5063 default:
5064 Py_FatalError("Inconsistent interned string state.");
5065 }
5066 s->ob_sstate = SSTATE_NOT_INTERNED;
5067 }
5068 Py_DECREF(keys);
5069 PyDict_Clear(interned);
5070 Py_DECREF(interned);
5071 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005072}