blob: 3901f5731a747d964585a4a87ee752a64c05f43f [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Gregory P. Smith14acde32008-04-09 23:41:13 +000056 if (size < 0) {
57 PyErr_SetString(PyExc_SystemError,
58 "Negative size passed to PyString_FromStringAndSize");
59 return NULL;
60 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 if (size == 0 && (op = nullstring) != NULL) {
62#ifdef COUNT_ALLOCS
63 null_strings++;
64#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000065 Py_INCREF(op);
66 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 if (size == 1 && str != NULL &&
69 (op = characters[*str & UCHAR_MAX]) != NULL)
70 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071#ifdef COUNT_ALLOCS
72 one_strings++;
73#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 Py_INCREF(op);
75 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000076 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000077
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000078 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000079 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000082 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000084 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000086 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000088 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000096 PyObject *t = (PyObject *)op;
97 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000098 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000140 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000162 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000168 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
Tim Peters8931ff12006-05-13 23:28:20 +0000183 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
184 * they don't affect the amount of space we reserve.
185 */
186 if ((*f == 'l' || *f == 'z') &&
187 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000197 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000243 Py_ssize_t i;
244 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000245 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000246 /* parse the width.precision part (we're only
247 interested in the precision value, if any) */
248 n = 0;
249 while (isdigit(Py_CHARMASK(*f)))
250 n = (n*10) + *f++ - '0';
251 if (*f == '.') {
252 f++;
253 n = 0;
254 while (isdigit(Py_CHARMASK(*f)))
255 n = (n*10) + *f++ - '0';
256 }
257 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
258 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000259 /* handle the long flag, but only for %ld and %lu.
260 others can be added when necessary. */
261 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000262 longflag = 1;
263 ++f;
264 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000265 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000266 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000267 size_tflag = 1;
268 ++f;
269 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000270
Barry Warsawdadace02001-08-24 18:32:06 +0000271 switch (*f) {
272 case 'c':
273 *s++ = va_arg(vargs, int);
274 break;
275 case 'd':
276 if (longflag)
277 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000278 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000279 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
280 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000281 else
282 sprintf(s, "%d", va_arg(vargs, int));
283 s += strlen(s);
284 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000285 case 'u':
286 if (longflag)
287 sprintf(s, "%lu",
288 va_arg(vargs, unsigned long));
289 else if (size_tflag)
290 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
291 va_arg(vargs, size_t));
292 else
293 sprintf(s, "%u",
294 va_arg(vargs, unsigned int));
295 s += strlen(s);
296 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000297 case 'i':
298 sprintf(s, "%i", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 'x':
302 sprintf(s, "%x", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 's':
306 p = va_arg(vargs, char*);
307 i = strlen(p);
308 if (n > 0 && i > n)
309 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000310 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000311 s += i;
312 break;
313 case 'p':
314 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000315 /* %p is ill-defined: ensure leading 0x. */
316 if (s[1] == 'X')
317 s[1] = 'x';
318 else if (s[1] != 'x') {
319 memmove(s+2, s, strlen(s)+1);
320 s[0] = '0';
321 s[1] = 'x';
322 }
Barry Warsawdadace02001-08-24 18:32:06 +0000323 s += strlen(s);
324 break;
325 case '%':
326 *s++ = '%';
327 break;
328 default:
329 strcpy(s, p);
330 s += strlen(s);
331 goto end;
332 }
333 } else
334 *s++ = *f;
335 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336
Barry Warsawdadace02001-08-24 18:32:06 +0000337 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000339 return string;
340}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000341
Barry Warsawdadace02001-08-24 18:32:06 +0000342PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000343PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000344{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000345 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000346 va_list vargs;
347
348#ifdef HAVE_STDARG_PROTOTYPES
349 va_start(vargs, format);
350#else
351 va_start(vargs);
352#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000353 ret = PyString_FromFormatV(format, vargs);
354 va_end(vargs);
355 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000356}
357
358
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000360 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361 const char *encoding,
362 const char *errors)
363{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000364 PyObject *v, *str;
365
366 str = PyString_FromStringAndSize(s, size);
367 if (str == NULL)
368 return NULL;
369 v = PyString_AsDecodedString(str, encoding, errors);
370 Py_DECREF(str);
371 return v;
372}
373
374PyObject *PyString_AsDecodedObject(PyObject *str,
375 const char *encoding,
376 const char *errors)
377{
378 PyObject *v;
379
380 if (!PyString_Check(str)) {
381 PyErr_BadArgument();
382 goto onError;
383 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000384
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385 if (encoding == NULL) {
386#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000387 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000388#else
389 PyErr_SetString(PyExc_ValueError, "no encoding specified");
390 goto onError;
391#endif
392 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393
394 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395 v = PyCodec_Decode(str, encoding, errors);
396 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398
399 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000400
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000402 return NULL;
403}
404
405PyObject *PyString_AsDecodedString(PyObject *str,
406 const char *encoding,
407 const char *errors)
408{
409 PyObject *v;
410
411 v = PyString_AsDecodedObject(str, encoding, errors);
412 if (v == NULL)
413 goto onError;
414
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000415#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000416 /* Convert Unicode to a string using the default encoding */
417 if (PyUnicode_Check(v)) {
418 PyObject *temp = v;
419 v = PyUnicode_AsEncodedString(v, NULL, NULL);
420 Py_DECREF(temp);
421 if (v == NULL)
422 goto onError;
423 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000424#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 if (!PyString_Check(v)) {
426 PyErr_Format(PyExc_TypeError,
427 "decoder did not return a string object (type=%.400s)",
428 v->ob_type->tp_name);
429 Py_DECREF(v);
430 goto onError;
431 }
432
433 return v;
434
435 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436 return NULL;
437}
438
439PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000440 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 const char *encoding,
442 const char *errors)
443{
444 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000445
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 str = PyString_FromStringAndSize(s, size);
447 if (str == NULL)
448 return NULL;
449 v = PyString_AsEncodedString(str, encoding, errors);
450 Py_DECREF(str);
451 return v;
452}
453
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000454PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 const char *encoding,
456 const char *errors)
457{
458 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000459
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000460 if (!PyString_Check(str)) {
461 PyErr_BadArgument();
462 goto onError;
463 }
464
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465 if (encoding == NULL) {
466#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000467 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000468#else
469 PyErr_SetString(PyExc_ValueError, "no encoding specified");
470 goto onError;
471#endif
472 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000473
474 /* Encode via the codec registry */
475 v = PyCodec_Encode(str, encoding, errors);
476 if (v == NULL)
477 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000478
479 return v;
480
481 onError:
482 return NULL;
483}
484
485PyObject *PyString_AsEncodedString(PyObject *str,
486 const char *encoding,
487 const char *errors)
488{
489 PyObject *v;
490
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000491 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000492 if (v == NULL)
493 goto onError;
494
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000495#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000496 /* Convert Unicode to a string using the default encoding */
497 if (PyUnicode_Check(v)) {
498 PyObject *temp = v;
499 v = PyUnicode_AsEncodedString(v, NULL, NULL);
500 Py_DECREF(temp);
501 if (v == NULL)
502 goto onError;
503 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000504#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 if (!PyString_Check(v)) {
506 PyErr_Format(PyExc_TypeError,
507 "encoder did not return a string object (type=%.400s)",
508 v->ob_type->tp_name);
509 Py_DECREF(v);
510 goto onError;
511 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000512
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000513 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000514
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000515 onError:
516 return NULL;
517}
518
Guido van Rossum234f9421993-06-17 12:35:49 +0000519static void
Fred Drakeba096332000-07-09 07:04:36 +0000520string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000521{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000522 switch (PyString_CHECK_INTERNED(op)) {
523 case SSTATE_NOT_INTERNED:
524 break;
525
526 case SSTATE_INTERNED_MORTAL:
527 /* revive dead object temporarily for DelItem */
528 op->ob_refcnt = 3;
529 if (PyDict_DelItem(interned, op) != 0)
530 Py_FatalError(
531 "deletion of interned string failed");
532 break;
533
534 case SSTATE_INTERNED_IMMORTAL:
535 Py_FatalError("Immortal interned string died.");
536
537 default:
538 Py_FatalError("Inconsistent interned string state.");
539 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000540 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000541}
542
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000543/* Unescape a backslash-escaped string. If unicode is non-zero,
544 the string is a u-literal. If recode_encoding is non-zero,
545 the string is UTF-8 encoded and should be re-encoded in the
546 specified encoding. */
547
548PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000549 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000550 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000551 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000552 const char *recode_encoding)
553{
554 int c;
555 char *p, *buf;
556 const char *end;
557 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000558 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000559 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 if (v == NULL)
561 return NULL;
562 p = buf = PyString_AsString(v);
563 end = s + len;
564 while (s < end) {
565 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000566 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000567#ifdef Py_USING_UNICODE
568 if (recode_encoding && (*s & 0x80)) {
569 PyObject *u, *w;
570 char *r;
571 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000572 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000573 t = s;
574 /* Decode non-ASCII bytes as UTF-8. */
575 while (t < end && (*t & 0x80)) t++;
576 u = PyUnicode_DecodeUTF8(s, t - s, errors);
577 if(!u) goto failed;
578
579 /* Recode them in target encoding. */
580 w = PyUnicode_AsEncodedString(
581 u, recode_encoding, errors);
582 Py_DECREF(u);
583 if (!w) goto failed;
584
585 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000586 assert(PyString_Check(w));
587 r = PyString_AS_STRING(w);
588 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000589 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000590 p += rn;
591 Py_DECREF(w);
592 s = t;
593 } else {
594 *p++ = *s++;
595 }
596#else
597 *p++ = *s++;
598#endif
599 continue;
600 }
601 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000602 if (s==end) {
603 PyErr_SetString(PyExc_ValueError,
604 "Trailing \\ in string");
605 goto failed;
606 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000607 switch (*s++) {
608 /* XXX This assumes ASCII! */
609 case '\n': break;
610 case '\\': *p++ = '\\'; break;
611 case '\'': *p++ = '\''; break;
612 case '\"': *p++ = '\"'; break;
613 case 'b': *p++ = '\b'; break;
614 case 'f': *p++ = '\014'; break; /* FF */
615 case 't': *p++ = '\t'; break;
616 case 'n': *p++ = '\n'; break;
617 case 'r': *p++ = '\r'; break;
618 case 'v': *p++ = '\013'; break; /* VT */
619 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
620 case '0': case '1': case '2': case '3':
621 case '4': case '5': case '6': case '7':
622 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000623 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000624 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000625 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 c = (c<<3) + *s++ - '0';
627 }
628 *p++ = c;
629 break;
630 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000631 if (s+1 < end &&
632 isxdigit(Py_CHARMASK(s[0])) &&
633 isxdigit(Py_CHARMASK(s[1])))
634 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 unsigned int x = 0;
636 c = Py_CHARMASK(*s);
637 s++;
638 if (isdigit(c))
639 x = c - '0';
640 else if (islower(c))
641 x = 10 + c - 'a';
642 else
643 x = 10 + c - 'A';
644 x = x << 4;
645 c = Py_CHARMASK(*s);
646 s++;
647 if (isdigit(c))
648 x += c - '0';
649 else if (islower(c))
650 x += 10 + c - 'a';
651 else
652 x += 10 + c - 'A';
653 *p++ = x;
654 break;
655 }
656 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000657 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000659 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000660 }
661 if (strcmp(errors, "replace") == 0) {
662 *p++ = '?';
663 } else if (strcmp(errors, "ignore") == 0)
664 /* do nothing */;
665 else {
666 PyErr_Format(PyExc_ValueError,
667 "decoding error; "
668 "unknown error handling code: %.400s",
669 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000670 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000671 }
672#ifndef Py_USING_UNICODE
673 case 'u':
674 case 'U':
675 case 'N':
676 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000677 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000678 "Unicode escapes not legal "
679 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000680 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000681 }
682#endif
683 default:
684 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000685 s--;
686 goto non_esc; /* an arbitry number of unescaped
687 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 }
689 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000690 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000691 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000692 return v;
693 failed:
694 Py_DECREF(v);
695 return NULL;
696}
697
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000698/* -------------------------------------------------------------------- */
699/* object api */
700
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702string_getsize(register PyObject *op)
703{
704 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000705 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706 if (PyString_AsStringAndSize(op, &s, &len))
707 return -1;
708 return len;
709}
710
711static /*const*/ char *
712string_getbuffer(register PyObject *op)
713{
714 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (PyString_AsStringAndSize(op, &s, &len))
717 return NULL;
718 return s;
719}
720
Martin v. Löwis18e16552006-02-15 17:27:45 +0000721Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000722PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 if (!PyString_Check(op))
725 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727}
728
729/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000730PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732 if (!PyString_Check(op))
733 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000734 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735}
736
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737int
738PyString_AsStringAndSize(register PyObject *obj,
739 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000740 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741{
742 if (s == NULL) {
743 PyErr_BadInternalCall();
744 return -1;
745 }
746
747 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000748#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 if (PyUnicode_Check(obj)) {
750 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
751 if (obj == NULL)
752 return -1;
753 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000754 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000755#endif
756 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000757 PyErr_Format(PyExc_TypeError,
758 "expected string or Unicode object, "
759 "%.200s found", obj->ob_type->tp_name);
760 return -1;
761 }
762 }
763
764 *s = PyString_AS_STRING(obj);
765 if (len != NULL)
766 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000767 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000768 PyErr_SetString(PyExc_TypeError,
769 "expected string without null bytes");
770 return -1;
771 }
772 return 0;
773}
774
Fredrik Lundhaf722372006-05-25 17:55:31 +0000775/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000776/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000777
Fredrik Lundha50d2012006-05-26 17:04:58 +0000778#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000779
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000781#define STRINGLIB_LEN PyString_GET_SIZE
782#define STRINGLIB_NEW PyString_FromStringAndSize
783#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000784
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Fredrik Lundha50d2012006-05-26 17:04:58 +0000787#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000788
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000789#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000790#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000791#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000792
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000794static int
Fred Drakeba096332000-07-09 07:04:36 +0000795string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000797 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000799 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000800
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000801 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000802 if (! PyString_CheckExact(op)) {
803 int ret;
804 /* A str subclass may have its own __str__ method. */
805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
811 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000812 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000813 char *data = op->ob_sval;
814 Py_ssize_t size = op->ob_size;
815 while (size > INT_MAX) {
816 /* Very long strings cannot be written atomically.
817 * But don't write exactly INT_MAX bytes at a time
818 * to avoid memory aligment issues.
819 */
820 const int chunk_size = INT_MAX & ~0x3FFF;
821 fwrite(data, 1, chunk_size, fp);
822 data += chunk_size;
823 size -= chunk_size;
824 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000825#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000826 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000827#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000828 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000829#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000830 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832
Thomas Wouters7e474022000-07-16 12:04:32 +0000833 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000834 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000835 if (memchr(op->ob_sval, '\'', op->ob_size) &&
836 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 quote = '"';
838
839 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840 for (i = 0; i < op->ob_size; i++) {
841 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000842 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000844 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000846 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000848 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000849 fprintf(fp, "\\r");
850 else if (c < ' ' || c >= 0x7f)
851 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000852 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000853 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000856 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857}
858
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859PyObject *
860PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000862 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000863 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000864 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000865 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000866 PyErr_SetString(PyExc_OverflowError,
867 "string is too large to make repr");
Guido van Rossume6a6f392007-11-07 01:19:49 +0000868 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000869 }
870 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000872 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 }
874 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000875 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 register char c;
877 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 int quote;
879
Thomas Wouters7e474022000-07-16 12:04:32 +0000880 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000882 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000883 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000884 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000885 quote = '"';
886
Tim Peters9161c8b2001-12-03 01:55:38 +0000887 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000888 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000889 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000890 /* There's at least enough room for a hex escape
891 and a closing quote. */
892 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000896 else if (c == '\t')
897 *p++ = '\\', *p++ = 't';
898 else if (c == '\n')
899 *p++ = '\\', *p++ = 'n';
900 else if (c == '\r')
901 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else if (c < ' ' || c >= 0x7f) {
903 /* For performance, we don't want to call
904 PyOS_snprintf here (extra layers of
905 function call). */
906 sprintf(p, "\\x%02x", c & 0xff);
907 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000908 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000909 else
910 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000912 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000913 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000916 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000917 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000919}
920
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000922string_repr(PyObject *op)
923{
924 return PyString_Repr(op, 1);
925}
926
927static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000928string_str(PyObject *s)
929{
Tim Petersc9933152001-10-16 20:18:24 +0000930 assert(PyString_Check(s));
931 if (PyString_CheckExact(s)) {
932 Py_INCREF(s);
933 return s;
934 }
935 else {
936 /* Subtype -- return genuine string with the same value. */
937 PyStringObject *t = (PyStringObject *) s;
938 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
939 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000940}
941
Martin v. Löwis18e16552006-02-15 17:27:45 +0000942static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000943string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944{
945 return a->ob_size;
946}
947
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000949string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950{
Andrew Dalke598710c2006-05-25 18:18:39 +0000951 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 register PyStringObject *op;
953 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000954#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000955 if (PyUnicode_Check(bb))
956 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000957#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000958 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000959 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000960 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000961 return NULL;
962 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000965 if ((a->ob_size == 0 || b->ob_size == 0) &&
966 PyString_CheckExact(a) && PyString_CheckExact(b)) {
967 if (a->ob_size == 0) {
968 Py_INCREF(bb);
969 return bb;
970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 Py_INCREF(a);
972 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973 }
974 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000975 if (size < 0) {
976 PyErr_SetString(PyExc_OverflowError,
977 "strings are too large to concat");
978 return NULL;
979 }
980
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000981 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000982 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000985 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000986 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000987 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000988 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
989 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000990 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992#undef b
993}
994
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000996string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000997{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000998 register Py_ssize_t i;
999 register Py_ssize_t j;
1000 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001002 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 if (n < 0)
1004 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001005 /* watch out for overflows: the size can overflow int,
1006 * and the # of bytes needed can overflow size_t
1007 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001008 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001009 if (n && size / n != a->ob_size) {
1010 PyErr_SetString(PyExc_OverflowError,
1011 "repeated string is too long");
1012 return NULL;
1013 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001014 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001015 Py_INCREF(a);
1016 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001017 }
Tim Peterse7c05322004-06-27 17:24:49 +00001018 nbytes = (size_t)size;
1019 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001020 PyErr_SetString(PyExc_OverflowError,
1021 "repeated string is too long");
1022 return NULL;
1023 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001025 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001026 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001028 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001029 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001030 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001031 op->ob_sval[size] = '\0';
1032 if (a->ob_size == 1 && n > 0) {
1033 memset(op->ob_sval, a->ob_sval[0] , n);
1034 return (PyObject *) op;
1035 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001036 i = 0;
1037 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001038 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001039 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 }
1041 while (i < size) {
1042 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001043 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 i += j;
1045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047}
1048
1049/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1050
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001051static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001052string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001054 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055{
1056 if (i < 0)
1057 i = 0;
1058 if (j < 0)
1059 j = 0; /* Avoid signed/unsigned bug in next line */
1060 if (j > a->ob_size)
1061 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001062 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1063 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001064 Py_INCREF(a);
1065 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001066 }
1067 if (j < i)
1068 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001069 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070}
1071
Guido van Rossum9284a572000-03-07 15:53:43 +00001072static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001074{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001075 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001076#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077 if (PyUnicode_Check(sub_obj))
1078 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001079#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001081 PyErr_SetString(PyExc_TypeError,
1082 "'in <string>' requires string as left operand");
1083 return -1;
1084 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001085 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001086
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001087 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001088}
1089
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001091string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001093 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001094 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001096 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097 return NULL;
1098 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001099 pchar = a->ob_sval[i];
1100 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001101 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001102 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001103 else {
1104#ifdef COUNT_ALLOCS
1105 one_strings++;
1106#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001107 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001109 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110}
1111
Martin v. Löwiscd353062001-05-24 16:56:35 +00001112static PyObject*
1113string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001114{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001115 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001116 Py_ssize_t len_a, len_b;
1117 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 PyObject *result;
1119
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001120 /* Make sure both arguments are strings. */
1121 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 result = Py_NotImplemented;
1123 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001124 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001125 if (a == b) {
1126 switch (op) {
1127 case Py_EQ:case Py_LE:case Py_GE:
1128 result = Py_True;
1129 goto out;
1130 case Py_NE:case Py_LT:case Py_GT:
1131 result = Py_False;
1132 goto out;
1133 }
1134 }
1135 if (op == Py_EQ) {
1136 /* Supporting Py_NE here as well does not save
1137 much time, since Py_NE is rarely used. */
1138 if (a->ob_size == b->ob_size
1139 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001140 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001141 a->ob_size) == 0)) {
1142 result = Py_True;
1143 } else {
1144 result = Py_False;
1145 }
1146 goto out;
1147 }
1148 len_a = a->ob_size; len_b = b->ob_size;
1149 min_len = (len_a < len_b) ? len_a : len_b;
1150 if (min_len > 0) {
1151 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1152 if (c==0)
1153 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1154 }else
1155 c = 0;
1156 if (c == 0)
1157 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1158 switch (op) {
1159 case Py_LT: c = c < 0; break;
1160 case Py_LE: c = c <= 0; break;
1161 case Py_EQ: assert(0); break; /* unreachable */
1162 case Py_NE: c = c != 0; break;
1163 case Py_GT: c = c > 0; break;
1164 case Py_GE: c = c >= 0; break;
1165 default:
1166 result = Py_NotImplemented;
1167 goto out;
1168 }
1169 result = c ? Py_True : Py_False;
1170 out:
1171 Py_INCREF(result);
1172 return result;
1173}
1174
1175int
1176_PyString_Eq(PyObject *o1, PyObject *o2)
1177{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001178 PyStringObject *a = (PyStringObject*) o1;
1179 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001180 return a->ob_size == b->ob_size
1181 && *a->ob_sval == *b->ob_sval
1182 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001183}
1184
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185static long
Fred Drakeba096332000-07-09 07:04:36 +00001186string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001188 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 register unsigned char *p;
1190 register long x;
1191
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001192 if (a->ob_shash != -1)
1193 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001194 len = a->ob_size;
1195 p = (unsigned char *) a->ob_sval;
1196 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001197 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001198 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001199 x ^= a->ob_size;
1200 if (x == -1)
1201 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001202 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001203 return x;
1204}
1205
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206static PyObject*
1207string_subscript(PyStringObject* self, PyObject* item)
1208{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001209 if (PyIndex_Check(item)) {
1210 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 if (i == -1 && PyErr_Occurred())
1212 return NULL;
1213 if (i < 0)
1214 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001215 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001216 }
1217 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001218 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 char* source_buf;
1220 char* result_buf;
1221 PyObject* result;
1222
Tim Petersae1d0c92006-03-17 03:29:34 +00001223 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 PyString_GET_SIZE(self),
1225 &start, &stop, &step, &slicelength) < 0) {
1226 return NULL;
1227 }
1228
1229 if (slicelength <= 0) {
1230 return PyString_FromStringAndSize("", 0);
1231 }
1232 else {
1233 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001234 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001235 if (result_buf == NULL)
1236 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001237
Tim Petersae1d0c92006-03-17 03:29:34 +00001238 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 cur += step, i++) {
1240 result_buf[i] = source_buf[cur];
1241 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001242
1243 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 slicelength);
1245 PyMem_Free(result_buf);
1246 return result;
1247 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001248 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001249 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001250 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001251 "string indices must be integers");
1252 return NULL;
1253 }
1254}
1255
Martin v. Löwis18e16552006-02-15 17:27:45 +00001256static Py_ssize_t
1257string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258{
1259 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001260 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001261 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001262 return -1;
1263 }
1264 *ptr = (void *)self->ob_sval;
1265 return self->ob_size;
1266}
1267
Martin v. Löwis18e16552006-02-15 17:27:45 +00001268static Py_ssize_t
1269string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001270{
Guido van Rossum045e6881997-09-08 18:30:11 +00001271 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001272 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273 return -1;
1274}
1275
Martin v. Löwis18e16552006-02-15 17:27:45 +00001276static Py_ssize_t
1277string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278{
1279 if ( lenp )
1280 *lenp = self->ob_size;
1281 return 1;
1282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001286{
1287 if ( index != 0 ) {
1288 PyErr_SetString(PyExc_SystemError,
1289 "accessing non-existent string segment");
1290 return -1;
1291 }
1292 *ptr = self->ob_sval;
1293 return self->ob_size;
1294}
1295
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001296static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001297 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001298 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001299 (ssizeargfunc)string_repeat, /*sq_repeat*/
1300 (ssizeargfunc)string_item, /*sq_item*/
1301 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001302 0, /*sq_ass_item*/
1303 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001304 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001305};
1306
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001307static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001308 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001309 (binaryfunc)string_subscript,
1310 0,
1311};
1312
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001313static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001314 (readbufferproc)string_buffer_getreadbuf,
1315 (writebufferproc)string_buffer_getwritebuf,
1316 (segcountproc)string_buffer_getsegcount,
1317 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001318};
1319
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320
1321
1322#define LEFTSTRIP 0
1323#define RIGHTSTRIP 1
1324#define BOTHSTRIP 2
1325
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001326/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001327static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1328
1329#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001330
Andrew Dalke525eab32006-05-26 14:00:45 +00001331
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001332/* Don't call if length < 2 */
1333#define Py_STRING_MATCH(target, offset, pattern, length) \
1334 (target[offset] == pattern[0] && \
1335 target[offset+length-1] == pattern[length-1] && \
1336 !memcmp(target+offset+1, pattern+1, length-2) )
1337
1338
Andrew Dalke525eab32006-05-26 14:00:45 +00001339/* Overallocate the initial list to reduce the number of reallocs for small
1340 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1341 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1342 text (roughly 11 words per line) and field delimited data (usually 1-10
1343 fields). For large strings the split algorithms are bandwidth limited
1344 so increasing the preallocation likely will not improve things.*/
1345
1346#define MAX_PREALLOC 12
1347
1348/* 5 splits gives 6 elements */
1349#define PREALLOC_SIZE(maxsplit) \
1350 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1351
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001352#define SPLIT_APPEND(data, left, right) \
1353 str = PyString_FromStringAndSize((data) + (left), \
1354 (right) - (left)); \
1355 if (str == NULL) \
1356 goto onError; \
1357 if (PyList_Append(list, str)) { \
1358 Py_DECREF(str); \
1359 goto onError; \
1360 } \
1361 else \
1362 Py_DECREF(str);
1363
Andrew Dalke02758d62006-05-26 15:21:01 +00001364#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001365 str = PyString_FromStringAndSize((data) + (left), \
1366 (right) - (left)); \
1367 if (str == NULL) \
1368 goto onError; \
1369 if (count < MAX_PREALLOC) { \
1370 PyList_SET_ITEM(list, count, str); \
1371 } else { \
1372 if (PyList_Append(list, str)) { \
1373 Py_DECREF(str); \
1374 goto onError; \
1375 } \
1376 else \
1377 Py_DECREF(str); \
1378 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001379 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001380
1381/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001382#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001383
Andrew Dalke02758d62006-05-26 15:21:01 +00001384#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1385#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1386#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1387#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1388
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001389Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001390split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391{
Andrew Dalke525eab32006-05-26 14:00:45 +00001392 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001393 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001394 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395
1396 if (list == NULL)
1397 return NULL;
1398
Andrew Dalke02758d62006-05-26 15:21:01 +00001399 i = j = 0;
1400
1401 while (maxsplit-- > 0) {
1402 SKIP_SPACE(s, i, len);
1403 if (i==len) break;
1404 j = i; i++;
1405 SKIP_NONSPACE(s, i, len);
1406 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001408
1409 if (i < len) {
1410 /* Only occurs when maxsplit was reached */
1411 /* Skip any remaining whitespace and copy to end of string */
1412 SKIP_SPACE(s, i, len);
1413 if (i != len)
1414 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001416 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001418 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 Py_DECREF(list);
1420 return NULL;
1421}
1422
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001423Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001424split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425{
Andrew Dalke525eab32006-05-26 14:00:45 +00001426 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001427 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001428 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001429
1430 if (list == NULL)
1431 return NULL;
1432
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001433 i = j = 0;
1434 while ((j < len) && (maxcount-- > 0)) {
1435 for(; j<len; j++) {
1436 /* I found that using memchr makes no difference */
1437 if (s[j] == ch) {
1438 SPLIT_ADD(s, i, j);
1439 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001441 }
1442 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001444 if (i <= len) {
1445 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001446 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001447 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001448 return list;
1449
1450 onError:
1451 Py_DECREF(list);
1452 return NULL;
1453}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001455PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456"S.split([sep [,maxsplit]]) -> list of strings\n\
1457\n\
1458Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001459delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001460splits are done. If sep is not specified or is None, any\n\
1461whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462
1463static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001464string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001466 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001467 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001468 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001469 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001470#ifdef USE_FAST
1471 Py_ssize_t pos;
1472#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473
Martin v. Löwis9c830762006-04-13 08:37:17 +00001474 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001477 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 if (PyString_Check(subobj)) {
1481 sub = PyString_AS_STRING(subobj);
1482 n = PyString_GET_SIZE(subobj);
1483 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001484#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 else if (PyUnicode_Check(subobj))
1486 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001487#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001488 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1489 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 if (n == 0) {
1492 PyErr_SetString(PyExc_ValueError, "empty separator");
1493 return NULL;
1494 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495 else if (n == 1)
1496 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497
Andrew Dalke525eab32006-05-26 14:00:45 +00001498 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 if (list == NULL)
1500 return NULL;
1501
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001502#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001504 while (maxsplit-- > 0) {
1505 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1506 if (pos < 0)
1507 break;
1508 j = i+pos;
1509 SPLIT_ADD(s, i, j);
1510 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001512#else
1513 i = j = 0;
1514 while ((j+n <= len) && (maxsplit-- > 0)) {
1515 for (; j+n <= len; j++) {
1516 if (Py_STRING_MATCH(s, j, sub, n)) {
1517 SPLIT_ADD(s, i, j);
1518 i = j = j + n;
1519 break;
1520 }
1521 }
1522 }
1523#endif
1524 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001525 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526 return list;
1527
Andrew Dalke525eab32006-05-26 14:00:45 +00001528 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 Py_DECREF(list);
1530 return NULL;
1531}
1532
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001533PyDoc_STRVAR(partition__doc__,
1534"S.partition(sep) -> (head, sep, tail)\n\
1535\n\
1536Searches for the separator sep in S, and returns the part before it,\n\
1537the separator itself, and the part after it. If the separator is not\n\
1538found, returns S and two empty strings.");
1539
1540static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001541string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001542{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001543 const char *sep;
1544 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001545
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001546 if (PyString_Check(sep_obj)) {
1547 sep = PyString_AS_STRING(sep_obj);
1548 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001552 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001553#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001554 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001555 return NULL;
1556
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001557 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001558 (PyObject*) self,
1559 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1560 sep_obj, sep, sep_len
1561 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001562}
1563
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001564PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001565"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001566\n\
1567Searches for the separator sep in S, starting at the end of S, and returns\n\
1568the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001569separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001570
1571static PyObject *
1572string_rpartition(PyStringObject *self, PyObject *sep_obj)
1573{
1574 const char *sep;
1575 Py_ssize_t sep_len;
1576
1577 if (PyString_Check(sep_obj)) {
1578 sep = PyString_AS_STRING(sep_obj);
1579 sep_len = PyString_GET_SIZE(sep_obj);
1580 }
1581#ifdef Py_USING_UNICODE
1582 else if (PyUnicode_Check(sep_obj))
1583 return PyUnicode_Partition((PyObject *) self, sep_obj);
1584#endif
1585 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1586 return NULL;
1587
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001588 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001589 (PyObject*) self,
1590 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1591 sep_obj, sep, sep_len
1592 );
1593}
1594
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001595Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001596rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597{
Andrew Dalke525eab32006-05-26 14:00:45 +00001598 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001599 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001600 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001601
1602 if (list == NULL)
1603 return NULL;
1604
Andrew Dalke02758d62006-05-26 15:21:01 +00001605 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001606
Andrew Dalke02758d62006-05-26 15:21:01 +00001607 while (maxsplit-- > 0) {
1608 RSKIP_SPACE(s, i);
1609 if (i<0) break;
1610 j = i; i--;
1611 RSKIP_NONSPACE(s, i);
1612 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001613 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001614 if (i >= 0) {
1615 /* Only occurs when maxsplit was reached */
1616 /* Skip any remaining whitespace and copy to beginning of string */
1617 RSKIP_SPACE(s, i);
1618 if (i >= 0)
1619 SPLIT_ADD(s, 0, i + 1);
1620
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001622 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001623 if (PyList_Reverse(list) < 0)
1624 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001625 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001626 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001627 Py_DECREF(list);
1628 return NULL;
1629}
1630
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001631Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001632rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633{
Andrew Dalke525eab32006-05-26 14:00:45 +00001634 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001635 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001636 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001637
1638 if (list == NULL)
1639 return NULL;
1640
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001641 i = j = len - 1;
1642 while ((i >= 0) && (maxcount-- > 0)) {
1643 for (; i >= 0; i--) {
1644 if (s[i] == ch) {
1645 SPLIT_ADD(s, i + 1, j + 1);
1646 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001647 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001648 }
1649 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650 }
1651 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001652 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001654 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001655 if (PyList_Reverse(list) < 0)
1656 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001657 return list;
1658
1659 onError:
1660 Py_DECREF(list);
1661 return NULL;
1662}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001663
1664PyDoc_STRVAR(rsplit__doc__,
1665"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1666\n\
1667Return a list of the words in the string S, using sep as the\n\
1668delimiter string, starting at the end of the string and working\n\
1669to the front. If maxsplit is given, at most maxsplit splits are\n\
1670done. If sep is not specified or is None, any whitespace string\n\
1671is a separator.");
1672
1673static PyObject *
1674string_rsplit(PyStringObject *self, PyObject *args)
1675{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001676 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001677 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001679 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680
Martin v. Löwis9c830762006-04-13 08:37:17 +00001681 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001682 return NULL;
1683 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001684 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001685 if (subobj == Py_None)
1686 return rsplit_whitespace(s, len, maxsplit);
1687 if (PyString_Check(subobj)) {
1688 sub = PyString_AS_STRING(subobj);
1689 n = PyString_GET_SIZE(subobj);
1690 }
1691#ifdef Py_USING_UNICODE
1692 else if (PyUnicode_Check(subobj))
1693 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1694#endif
1695 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1696 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001697
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698 if (n == 0) {
1699 PyErr_SetString(PyExc_ValueError, "empty separator");
1700 return NULL;
1701 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001702 else if (n == 1)
1703 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001704
Andrew Dalke525eab32006-05-26 14:00:45 +00001705 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001706 if (list == NULL)
1707 return NULL;
1708
1709 j = len;
1710 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001711
1712 while ( (i >= 0) && (maxsplit-- > 0) ) {
1713 for (; i>=0; i--) {
1714 if (Py_STRING_MATCH(s, i, sub, n)) {
1715 SPLIT_ADD(s, i + n, j);
1716 j = i;
1717 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001718 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001719 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001721 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001722 SPLIT_ADD(s, 0, j);
1723 FIX_PREALLOC_SIZE(list);
1724 if (PyList_Reverse(list) < 0)
1725 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001726 return list;
1727
Andrew Dalke525eab32006-05-26 14:00:45 +00001728onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001729 Py_DECREF(list);
1730 return NULL;
1731}
1732
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735"S.join(sequence) -> string\n\
1736\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001738sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739
1740static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001741string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742{
1743 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001747 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001749 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001750 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 seq = PySequence_Fast(orig, "");
1753 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001754 return NULL;
1755 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001756
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001757 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001758 if (seqlen == 0) {
1759 Py_DECREF(seq);
1760 return PyString_FromString("");
1761 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001764 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1765 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001766 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001768 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001770
Raymond Hettinger674f2412004-08-23 23:23:54 +00001771 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001772 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001773 * Do a pre-pass to figure out the total amount of space we'll
1774 * need (sz), see whether any argument is absurd, and defer to
1775 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001776 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001777 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001778 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001779 item = PySequence_Fast_GET_ITEM(seq, i);
1780 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001781#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001782 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001783 /* Defer to Unicode join.
1784 * CAUTION: There's no gurantee that the
1785 * original sequence can be iterated over
1786 * again, so we must pass seq here.
1787 */
1788 PyObject *result;
1789 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001790 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001791 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001792 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001793#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001795 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001796 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001797 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001798 Py_DECREF(seq);
1799 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001800 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 sz += PyString_GET_SIZE(item);
1802 if (i != 0)
1803 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001804 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001805 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001806 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001807 Py_DECREF(seq);
1808 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 }
1811
1812 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001813 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001814 if (res == NULL) {
1815 Py_DECREF(seq);
1816 return NULL;
1817 }
1818
1819 /* Catenate everything. */
1820 p = PyString_AS_STRING(res);
1821 for (i = 0; i < seqlen; ++i) {
1822 size_t n;
1823 item = PySequence_Fast_GET_ITEM(seq, i);
1824 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001825 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001826 p += n;
1827 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001828 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001829 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001830 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001832
Jeremy Hylton49048292000-07-11 03:28:17 +00001833 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835}
1836
Tim Peters52e155e2001-06-16 05:42:57 +00001837PyObject *
1838_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001839{
Tim Petersa7259592001-06-16 05:11:17 +00001840 assert(sep != NULL && PyString_Check(sep));
1841 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001842 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001843}
1844
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001845Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001846string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001847{
1848 if (*end > len)
1849 *end = len;
1850 else if (*end < 0)
1851 *end += len;
1852 if (*end < 0)
1853 *end = 0;
1854 if (*start < 0)
1855 *start += len;
1856 if (*start < 0)
1857 *start = 0;
1858}
1859
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001860Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001861string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001864 const char *sub;
1865 Py_ssize_t sub_len;
1866 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001868 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1869 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 return -2;
1871 if (PyString_Check(subobj)) {
1872 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001873 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001876 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001877 return PyUnicode_Find(
1878 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001879#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001880 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001881 /* XXX - the "expected a character buffer object" is pretty
1882 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883 return -2;
1884
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001885 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001886 return stringlib_find_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001889 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001890 return stringlib_rfind_slice(
1891 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1892 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893}
1894
1895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001896PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897"S.find(sub [,start [,end]]) -> int\n\
1898\n\
1899Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001900such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901arguments start and end are interpreted as in slice notation.\n\
1902\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001903Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904
1905static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001906string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001908 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 if (result == -2)
1910 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001911 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912}
1913
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.index(sub [,start [,end]]) -> int\n\
1917\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001918Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919
1920static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001921string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001923 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 if (result == -2)
1925 return NULL;
1926 if (result == -1) {
1927 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001928 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 return NULL;
1930 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001931 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932}
1933
1934
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001935PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936"S.rfind(sub [,start [,end]]) -> int\n\
1937\n\
1938Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001939such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940arguments start and end are interpreted as in slice notation.\n\
1941\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001942Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943
1944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001945string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001947 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 if (result == -2)
1949 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001950 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951}
1952
1953
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001954PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955"S.rindex(sub [,start [,end]]) -> int\n\
1956\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 if (result == -2)
1964 return NULL;
1965 if (result == -1) {
1966 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001967 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968 return NULL;
1969 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001970 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971}
1972
1973
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001974Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1976{
1977 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001978 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001979 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001980 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1981 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982
1983 i = 0;
1984 if (striptype != RIGHTSTRIP) {
1985 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1986 i++;
1987 }
1988 }
1989
1990 j = len;
1991 if (striptype != LEFTSTRIP) {
1992 do {
1993 j--;
1994 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1995 j++;
1996 }
1997
1998 if (i == 0 && j == len && PyString_CheckExact(self)) {
1999 Py_INCREF(self);
2000 return (PyObject*)self;
2001 }
2002 else
2003 return PyString_FromStringAndSize(s+i, j-i);
2004}
2005
2006
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002007Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002008do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009{
2010 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013 i = 0;
2014 if (striptype != RIGHTSTRIP) {
2015 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2016 i++;
2017 }
2018 }
2019
2020 j = len;
2021 if (striptype != LEFTSTRIP) {
2022 do {
2023 j--;
2024 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2025 j++;
2026 }
2027
Tim Peters8fa5dd02001-09-12 02:18:30 +00002028 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 Py_INCREF(self);
2030 return (PyObject*)self;
2031 }
2032 else
2033 return PyString_FromStringAndSize(s+i, j-i);
2034}
2035
2036
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002037Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002038do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2039{
2040 PyObject *sep = NULL;
2041
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043 return NULL;
2044
2045 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002046 if (PyString_Check(sep))
2047 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002048#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002049 else if (PyUnicode_Check(sep)) {
2050 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2051 PyObject *res;
2052 if (uniself==NULL)
2053 return NULL;
2054 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2055 striptype, sep);
2056 Py_DECREF(uniself);
2057 return res;
2058 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002062 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002063#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002064 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002065#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002066 STRIPNAME(striptype));
2067 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002068 }
2069
2070 return do_strip(self, striptype);
2071}
2072
2073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002074PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076\n\
2077Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002079If chars is given and not None, remove characters in chars instead.\n\
2080If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081
2082static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085 if (PyTuple_GET_SIZE(args) == 0)
2086 return do_strip(self, BOTHSTRIP); /* Common case */
2087 else
2088 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089}
2090
2091
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002092PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002093"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002095Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002096If chars is given and not None, remove characters in chars instead.\n\
2097If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002098
2099static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102 if (PyTuple_GET_SIZE(args) == 0)
2103 return do_strip(self, LEFTSTRIP); /* Common case */
2104 else
2105 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106}
2107
2108
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002109PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002110"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002112Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002113If chars is given and not None, remove characters in chars instead.\n\
2114If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115
2116static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002117string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002119 if (PyTuple_GET_SIZE(args) == 0)
2120 return do_strip(self, RIGHTSTRIP); /* Common case */
2121 else
2122 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123}
2124
2125
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002126PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127"S.lower() -> string\n\
2128\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002129Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002131/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2132#ifndef _tolower
2133#define _tolower tolower
2134#endif
2135
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002137string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002139 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002140 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002141 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002143 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002144 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002146
2147 s = PyString_AS_STRING(newobj);
2148
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002149 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002152 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002153 if (isupper(c))
2154 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002156
Anthony Baxtera6286212006-04-11 07:42:36 +00002157 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158}
2159
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002160PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161"S.upper() -> string\n\
2162\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002163Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002165#ifndef _toupper
2166#define _toupper toupper
2167#endif
2168
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002170string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002173 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002174 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002176 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002177 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002179
2180 s = PyString_AS_STRING(newobj);
2181
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002182 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002183
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002185 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002186 if (islower(c))
2187 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002189
Anthony Baxtera6286212006-04-11 07:42:36 +00002190 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191}
2192
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194"S.title() -> string\n\
2195\n\
2196Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002197characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198
2199static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002200string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201{
2202 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002203 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002205 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206
Anthony Baxtera6286212006-04-11 07:42:36 +00002207 newobj = PyString_FromStringAndSize(NULL, n);
2208 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002209 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002210 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211 for (i = 0; i < n; i++) {
2212 int c = Py_CHARMASK(*s++);
2213 if (islower(c)) {
2214 if (!previous_is_cased)
2215 c = toupper(c);
2216 previous_is_cased = 1;
2217 } else if (isupper(c)) {
2218 if (previous_is_cased)
2219 c = tolower(c);
2220 previous_is_cased = 1;
2221 } else
2222 previous_is_cased = 0;
2223 *s_new++ = c;
2224 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226}
2227
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229"S.capitalize() -> string\n\
2230\n\
2231Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002232capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
2234static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002235string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236{
2237 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002238 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002239 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240
Anthony Baxtera6286212006-04-11 07:42:36 +00002241 newobj = PyString_FromStringAndSize(NULL, n);
2242 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 if (0 < n) {
2246 int c = Py_CHARMASK(*s++);
2247 if (islower(c))
2248 *s_new = toupper(c);
2249 else
2250 *s_new = c;
2251 s_new++;
2252 }
2253 for (i = 1; i < n; i++) {
2254 int c = Py_CHARMASK(*s++);
2255 if (isupper(c))
2256 *s_new = tolower(c);
2257 else
2258 *s_new = c;
2259 s_new++;
2260 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002261 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262}
2263
2264
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002265PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266"S.count(sub[, start[, end]]) -> int\n\
2267\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002268Return the number of non-overlapping occurrences of substring sub in\n\
2269string S[start:end]. Optional arguments start and end are interpreted\n\
2270as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271
2272static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002273string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002275 PyObject *sub_obj;
2276 const char *str = PyString_AS_STRING(self), *sub;
2277 Py_ssize_t sub_len;
2278 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2281 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002283
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002284 if (PyString_Check(sub_obj)) {
2285 sub = PyString_AS_STRING(sub_obj);
2286 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002288#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002289 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002290 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002291 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002292 if (count == -1)
2293 return NULL;
2294 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002295 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002296 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002297#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002298 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 return NULL;
2300
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002301 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002302
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002303 return PyInt_FromSsize_t(
2304 stringlib_count(str + start, end - start, sub, sub_len)
2305 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306}
2307
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309"S.swapcase() -> string\n\
2310\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002312converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313
2314static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002315string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316{
2317 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002318 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002319 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320
Anthony Baxtera6286212006-04-11 07:42:36 +00002321 newobj = PyString_FromStringAndSize(NULL, n);
2322 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002324 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 for (i = 0; i < n; i++) {
2326 int c = Py_CHARMASK(*s++);
2327 if (islower(c)) {
2328 *s_new = toupper(c);
2329 }
2330 else if (isupper(c)) {
2331 *s_new = tolower(c);
2332 }
2333 else
2334 *s_new = c;
2335 s_new++;
2336 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002337 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338}
2339
2340
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002341PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342"S.translate(table [,deletechars]) -> string\n\
2343\n\
2344Return a copy of the string S, where all characters occurring\n\
2345in the optional argument deletechars are removed, and the\n\
2346remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002347translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348
2349static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002350string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 register char *input, *output;
2353 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002354 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002357 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 PyObject *result;
2359 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002362 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365
2366 if (PyString_Check(tableobj)) {
2367 table1 = PyString_AS_STRING(tableobj);
2368 tablen = PyString_GET_SIZE(tableobj);
2369 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002370#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002372 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 parameter; instead a mapping to None will cause characters
2374 to be deleted. */
2375 if (delobj != NULL) {
2376 PyErr_SetString(PyExc_TypeError,
2377 "deletions are implemented differently for unicode");
2378 return NULL;
2379 }
2380 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2381 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002382#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385
Martin v. Löwis00b61272002-12-12 20:03:19 +00002386 if (tablen != 256) {
2387 PyErr_SetString(PyExc_ValueError,
2388 "translation table must be 256 characters long");
2389 return NULL;
2390 }
2391
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 if (delobj != NULL) {
2393 if (PyString_Check(delobj)) {
2394 del_table = PyString_AS_STRING(delobj);
2395 dellen = PyString_GET_SIZE(delobj);
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyUnicode_Check(delobj)) {
2399 PyErr_SetString(PyExc_TypeError,
2400 "deletions are implemented differently for unicode");
2401 return NULL;
2402 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002403#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2405 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002406 }
2407 else {
2408 del_table = NULL;
2409 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 }
2411
2412 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002413 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414 result = PyString_FromStringAndSize((char *)NULL, inlen);
2415 if (result == NULL)
2416 return NULL;
2417 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002418 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419
2420 if (dellen == 0) {
2421 /* If no deletions are required, use faster code */
2422 for (i = inlen; --i >= 0; ) {
2423 c = Py_CHARMASK(*input++);
2424 if (Py_CHARMASK((*output++ = table[c])) != c)
2425 changed = 1;
2426 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002427 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 return result;
2429 Py_DECREF(result);
2430 Py_INCREF(input_obj);
2431 return input_obj;
2432 }
2433
2434 for (i = 0; i < 256; i++)
2435 trans_table[i] = Py_CHARMASK(table[i]);
2436
2437 for (i = 0; i < dellen; i++)
2438 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2439
2440 for (i = inlen; --i >= 0; ) {
2441 c = Py_CHARMASK(*input++);
2442 if (trans_table[c] != -1)
2443 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2444 continue;
2445 changed = 1;
2446 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002447 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448 Py_DECREF(result);
2449 Py_INCREF(input_obj);
2450 return input_obj;
2451 }
2452 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002453 if (inlen > 0)
2454 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 return result;
2456}
2457
2458
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002459#define FORWARD 1
2460#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002461
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002462/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002464#define findchar(target, target_len, c) \
2465 ((char *)memchr((const void *)(target), c, target_len))
2466
2467/* String ops must return a string. */
2468/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002469Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002470return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002472 if (PyString_CheckExact(self)) {
2473 Py_INCREF(self);
2474 return self;
2475 }
2476 return (PyStringObject *)PyString_FromStringAndSize(
2477 PyString_AS_STRING(self),
2478 PyString_GET_SIZE(self));
2479}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002481Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002482countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483{
2484 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002485 const char *start=target;
2486 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002488 while ( (start=findchar(start, end-start, c)) != NULL ) {
2489 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002490 if (count >= maxcount)
2491 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002492 start += 1;
2493 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002494 return count;
2495}
2496
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002497Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002498findstring(const char *target, Py_ssize_t target_len,
2499 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002500 Py_ssize_t start,
2501 Py_ssize_t end,
2502 int direction)
2503{
2504 if (start < 0) {
2505 start += target_len;
2506 if (start < 0)
2507 start = 0;
2508 }
2509 if (end > target_len) {
2510 end = target_len;
2511 } else if (end < 0) {
2512 end += target_len;
2513 if (end < 0)
2514 end = 0;
2515 }
2516
2517 /* zero-length substrings always match at the first attempt */
2518 if (pattern_len == 0)
2519 return (direction > 0) ? start : end;
2520
2521 end -= pattern_len;
2522
2523 if (direction < 0) {
2524 for (; end >= start; end--)
2525 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2526 return end;
2527 } else {
2528 for (; start <= end; start++)
2529 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2530 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531 }
2532 return -1;
2533}
2534
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002535Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002536countstring(const char *target, Py_ssize_t target_len,
2537 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 Py_ssize_t start,
2539 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002540 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002542 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002544 if (start < 0) {
2545 start += target_len;
2546 if (start < 0)
2547 start = 0;
2548 }
2549 if (end > target_len) {
2550 end = target_len;
2551 } else if (end < 0) {
2552 end += target_len;
2553 if (end < 0)
2554 end = 0;
2555 }
2556
2557 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002558 if (pattern_len == 0 || maxcount == 0) {
2559 if (target_len+1 < maxcount)
2560 return target_len+1;
2561 return maxcount;
2562 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563
2564 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002566 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2568 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002569 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570 end -= pattern_len-1;
2571 }
2572 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002573 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002574 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2575 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002576 if (--maxcount <= 0)
2577 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002578 start += pattern_len-1;
2579 }
2580 }
2581 return count;
2582}
2583
2584
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002585/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586
2587/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002588Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002590 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 Py_ssize_t maxcount)
2592{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002593 char *self_s, *result_s;
2594 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002595 Py_ssize_t count, i, product;
2596 PyStringObject *result;
2597
2598 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002599
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002600 /* 1 at the end plus 1 after every character */
2601 count = self_len+1;
2602 if (maxcount < count)
2603 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002604
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002605 /* Check for overflow */
2606 /* result_len = count * to_len + self_len; */
2607 product = count * to_len;
2608 if (product / to_len != count) {
2609 PyErr_SetString(PyExc_OverflowError,
2610 "replace string is too long");
2611 return NULL;
2612 }
2613 result_len = product + self_len;
2614 if (result_len < 0) {
2615 PyErr_SetString(PyExc_OverflowError,
2616 "replace string is too long");
2617 return NULL;
2618 }
2619
2620 if (! (result = (PyStringObject *)
2621 PyString_FromStringAndSize(NULL, result_len)) )
2622 return NULL;
2623
2624 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002625 result_s = PyString_AS_STRING(result);
2626
2627 /* TODO: special case single character, which doesn't need memcpy */
2628
2629 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002630 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002631 result_s += to_len;
2632 count -= 1;
2633
2634 for (i=0; i<count; i++) {
2635 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002636 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002637 result_s += to_len;
2638 }
2639
2640 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002641 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002642
2643 return result;
2644}
2645
2646/* Special case for deleting a single character */
2647/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002648Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649replace_delete_single_character(PyStringObject *self,
2650 char from_c, Py_ssize_t maxcount)
2651{
2652 char *self_s, *result_s;
2653 char *start, *next, *end;
2654 Py_ssize_t self_len, result_len;
2655 Py_ssize_t count;
2656 PyStringObject *result;
2657
2658 self_len = PyString_GET_SIZE(self);
2659 self_s = PyString_AS_STRING(self);
2660
Andrew Dalke51324072006-05-26 20:25:22 +00002661 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662 if (count == 0) {
2663 return return_self(self);
2664 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665
2666 result_len = self_len - count; /* from_len == 1 */
2667 assert(result_len>=0);
2668
2669 if ( (result = (PyStringObject *)
2670 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2671 return NULL;
2672 result_s = PyString_AS_STRING(result);
2673
2674 start = self_s;
2675 end = self_s + self_len;
2676 while (count-- > 0) {
2677 next = findchar(start, end-start, from_c);
2678 if (next == NULL)
2679 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002680 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002681 result_s += (next-start);
2682 start = next+1;
2683 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002684 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002685
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002686 return result;
2687}
2688
2689/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2690
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002691Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002692replace_delete_substring(PyStringObject *self,
2693 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002694 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002695 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002697 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002698 Py_ssize_t count, offset;
2699 PyStringObject *result;
2700
2701 self_len = PyString_GET_SIZE(self);
2702 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002703
2704 count = countstring(self_s, self_len,
2705 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002706 0, self_len, 1,
2707 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002708
2709 if (count == 0) {
2710 /* no matches */
2711 return return_self(self);
2712 }
2713
2714 result_len = self_len - (count * from_len);
2715 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002716
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 if ( (result = (PyStringObject *)
2718 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2719 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002720
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002721 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002722
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723 start = self_s;
2724 end = self_s + self_len;
2725 while (count-- > 0) {
2726 offset = findstring(start, end-start,
2727 from_s, from_len,
2728 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002729 if (offset == -1)
2730 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002732
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002733 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002734
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002735 result_s += (next-start);
2736 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002737 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002738 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002739 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002740}
2741
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002742/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002743Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744replace_single_character_in_place(PyStringObject *self,
2745 char from_c, char to_c,
2746 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002747{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002748 char *self_s, *result_s, *start, *end, *next;
2749 Py_ssize_t self_len;
2750 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 /* The result string will be the same size */
2753 self_s = PyString_AS_STRING(self);
2754 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002755
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002757
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 if (next == NULL) {
2759 /* No matches; return the original string */
2760 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002761 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002762
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002764 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 if (result == NULL)
2766 return NULL;
2767 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002768 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002769
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002770 /* change everything in-place, starting with this one */
2771 start = result_s + (next-self_s);
2772 *start = to_c;
2773 start++;
2774 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002775
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776 while (--maxcount > 0) {
2777 next = findchar(start, end-start, from_c);
2778 if (next == NULL)
2779 break;
2780 *next = to_c;
2781 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002782 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002783
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002784 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002785}
2786
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002787/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002788Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002790 const char *from_s, Py_ssize_t from_len,
2791 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 Py_ssize_t maxcount)
2793{
2794 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002795 char *self_s;
2796 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002800
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002801 self_s = PyString_AS_STRING(self);
2802 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002803
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002804 offset = findstring(self_s, self_len,
2805 from_s, from_len,
2806 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 if (offset == -1) {
2808 /* No matches; return the original string */
2809 return return_self(self);
2810 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002811
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002813 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 if (result == NULL)
2815 return NULL;
2816 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002817 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002818
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 /* change everything in-place, starting with this one */
2820 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002821 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 start += from_len;
2823 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002824
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 while ( --maxcount > 0) {
2826 offset = findstring(start, end-start,
2827 from_s, from_len,
2828 0, end-start, FORWARD);
2829 if (offset==-1)
2830 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002831 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 start += offset+from_len;
2833 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002834
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835 return result;
2836}
2837
2838/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002839Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840replace_single_character(PyStringObject *self,
2841 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002842 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843 Py_ssize_t maxcount)
2844{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002845 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002847 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848 Py_ssize_t count, product;
2849 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002850
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002851 self_s = PyString_AS_STRING(self);
2852 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002853
Andrew Dalke51324072006-05-26 20:25:22 +00002854 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855 if (count == 0) {
2856 /* no matches, return unchanged */
2857 return return_self(self);
2858 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002859
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002860 /* use the difference between current and new, hence the "-1" */
2861 /* result_len = self_len + count * (to_len-1) */
2862 product = count * (to_len-1);
2863 if (product / (to_len-1) != count) {
2864 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2865 return NULL;
2866 }
2867 result_len = self_len + product;
2868 if (result_len < 0) {
2869 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2870 return NULL;
2871 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002872
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873 if ( (result = (PyStringObject *)
2874 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2875 return NULL;
2876 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002877
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 start = self_s;
2879 end = self_s + self_len;
2880 while (count-- > 0) {
2881 next = findchar(start, end-start, from_c);
2882 if (next == NULL)
2883 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002884
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002885 if (next == start) {
2886 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002887 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 result_s += to_len;
2889 start += 1;
2890 } else {
2891 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002892 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002893 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002894 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 result_s += to_len;
2896 start = next+1;
2897 }
2898 }
2899 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002900 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002901
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002902 return result;
2903}
2904
2905/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002906Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002907replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002908 const char *from_s, Py_ssize_t from_len,
2909 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002911 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002913 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 Py_ssize_t count, offset, product;
2915 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002916
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 self_s = PyString_AS_STRING(self);
2918 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002919
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920 count = countstring(self_s, self_len,
2921 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002922 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 if (count == 0) {
2924 /* no matches, return unchanged */
2925 return return_self(self);
2926 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002927
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002928 /* Check for overflow */
2929 /* result_len = self_len + count * (to_len-from_len) */
2930 product = count * (to_len-from_len);
2931 if (product / (to_len-from_len) != count) {
2932 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2933 return NULL;
2934 }
2935 result_len = self_len + product;
2936 if (result_len < 0) {
2937 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2938 return NULL;
2939 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002940
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002941 if ( (result = (PyStringObject *)
2942 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2943 return NULL;
2944 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002945
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002946 start = self_s;
2947 end = self_s + self_len;
2948 while (count-- > 0) {
2949 offset = findstring(start, end-start,
2950 from_s, from_len,
2951 0, end-start, FORWARD);
2952 if (offset == -1)
2953 break;
2954 next = start+offset;
2955 if (next == start) {
2956 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002957 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958 result_s += to_len;
2959 start += from_len;
2960 } else {
2961 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002962 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002963 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002964 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002965 result_s += to_len;
2966 start = next+from_len;
2967 }
2968 }
2969 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002970 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002971
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002972 return result;
2973}
2974
2975
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002976Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002977replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002978 const char *from_s, Py_ssize_t from_len,
2979 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002980 Py_ssize_t maxcount)
2981{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982 if (maxcount < 0) {
2983 maxcount = PY_SSIZE_T_MAX;
2984 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2985 /* nothing to do; return the original string */
2986 return return_self(self);
2987 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002988
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002989 if (maxcount == 0 ||
2990 (from_len == 0 && to_len == 0)) {
2991 /* nothing to do; return the original string */
2992 return return_self(self);
2993 }
2994
2995 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002996
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002997 if (from_len == 0) {
2998 /* insert the 'to' string everywhere. */
2999 /* >>> "Python".replace("", ".") */
3000 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003001 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003002 }
3003
3004 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3005 /* point for an empty self string to generate a non-empty string */
3006 /* Special case so the remaining code always gets a non-empty string */
3007 if (PyString_GET_SIZE(self) == 0) {
3008 return return_self(self);
3009 }
3010
3011 if (to_len == 0) {
3012 /* delete all occurances of 'from' string */
3013 if (from_len == 1) {
3014 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003015 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003016 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003017 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003018 }
3019 }
3020
3021 /* Handle special case where both strings have the same length */
3022
3023 if (from_len == to_len) {
3024 if (from_len == 1) {
3025 return replace_single_character_in_place(
3026 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003027 from_s[0],
3028 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 maxcount);
3030 } else {
3031 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003032 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003033 }
3034 }
3035
3036 /* Otherwise use the more generic algorithms */
3037 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 return replace_single_character(self, from_s[0],
3039 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003040 } else {
3041 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003042 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003043 }
3044}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003045
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003046PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003047"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048\n\
3049Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003050old replaced by new. If the optional argument count is\n\
3051given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003052
3053static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003054string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003055{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003056 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003058 const char *from_s, *to_s;
3059 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003060
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003061 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003062 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003065 from_s = PyString_AS_STRING(from);
3066 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003068#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003069 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003070 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003071 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003072#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074 return NULL;
3075
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003077 to_s = PyString_AS_STRING(to);
3078 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003082 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003083 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003084#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003085 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086 return NULL;
3087
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003088 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003089 from_s, from_len,
3090 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003091}
3092
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003093/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003094
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003095/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003096 * against substr, using the start and end arguments. Returns
3097 * -1 on error, 0 if not found and 1 if found.
3098 */
3099Py_LOCAL(int)
3100_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3101 Py_ssize_t end, int direction)
3102{
3103 Py_ssize_t len = PyString_GET_SIZE(self);
3104 Py_ssize_t slen;
3105 const char* sub;
3106 const char* str;
3107
3108 if (PyString_Check(substr)) {
3109 sub = PyString_AS_STRING(substr);
3110 slen = PyString_GET_SIZE(substr);
3111 }
3112#ifdef Py_USING_UNICODE
3113 else if (PyUnicode_Check(substr))
3114 return PyUnicode_Tailmatch((PyObject *)self,
3115 substr, start, end, direction);
3116#endif
3117 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3118 return -1;
3119 str = PyString_AS_STRING(self);
3120
3121 string_adjust_indices(&start, &end, len);
3122
3123 if (direction < 0) {
3124 /* startswith */
3125 if (start+slen > len)
3126 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003127 } else {
3128 /* endswith */
3129 if (end-start < slen || start > len)
3130 return 0;
3131
3132 if (end-slen > start)
3133 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003134 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003135 if (end-start >= slen)
3136 return ! memcmp(str+start, sub, slen);
3137 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003138}
3139
3140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003141PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003142"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003143\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003144Return True if S starts with the specified prefix, False otherwise.\n\
3145With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003146With optional end, stop comparing S at that position.\n\
3147prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003148
3149static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003150string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003151{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003152 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003153 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003155 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003156
Guido van Rossumc6821402000-05-08 14:08:05 +00003157 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3158 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003160 if (PyTuple_Check(subobj)) {
3161 Py_ssize_t i;
3162 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3163 result = _string_tailmatch(self,
3164 PyTuple_GET_ITEM(subobj, i),
3165 start, end, -1);
3166 if (result == -1)
3167 return NULL;
3168 else if (result) {
3169 Py_RETURN_TRUE;
3170 }
3171 }
3172 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003173 }
Georg Brandl24250812006-06-09 18:45:48 +00003174 result = _string_tailmatch(self, subobj, start, end, -1);
3175 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003176 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003177 else
Georg Brandl24250812006-06-09 18:45:48 +00003178 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179}
3180
3181
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003182PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003183"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003184\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003185Return True if S ends with the specified suffix, False otherwise.\n\
3186With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003187With optional end, stop comparing S at that position.\n\
3188suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003189
3190static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003191string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003192{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003193 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003194 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003196 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003197
Guido van Rossumc6821402000-05-08 14:08:05 +00003198 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3199 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003200 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003201 if (PyTuple_Check(subobj)) {
3202 Py_ssize_t i;
3203 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3204 result = _string_tailmatch(self,
3205 PyTuple_GET_ITEM(subobj, i),
3206 start, end, +1);
3207 if (result == -1)
3208 return NULL;
3209 else if (result) {
3210 Py_RETURN_TRUE;
3211 }
3212 }
3213 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003214 }
Georg Brandl24250812006-06-09 18:45:48 +00003215 result = _string_tailmatch(self, subobj, start, end, +1);
3216 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003217 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003218 else
Georg Brandl24250812006-06-09 18:45:48 +00003219 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003220}
3221
3222
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003223PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003224"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003225\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003226Encodes S using the codec registered for encoding. encoding defaults\n\
3227to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003228handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003229a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3230'xmlcharrefreplace' as well as any other name registered with\n\
3231codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003232
3233static PyObject *
3234string_encode(PyStringObject *self, PyObject *args)
3235{
3236 char *encoding = NULL;
3237 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003239
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003240 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3241 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003242 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003243 if (v == NULL)
3244 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003245 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3246 PyErr_Format(PyExc_TypeError,
3247 "encoder did not return a string/unicode object "
3248 "(type=%.400s)",
3249 v->ob_type->tp_name);
3250 Py_DECREF(v);
3251 return NULL;
3252 }
3253 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003254
3255 onError:
3256 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257}
3258
3259
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003260PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003261"S.decode([encoding[,errors]]) -> object\n\
3262\n\
3263Decodes S using the codec registered for encoding. encoding defaults\n\
3264to the default encoding. errors may be given to set a different error\n\
3265handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003266a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3267as well as any other name registerd with codecs.register_error that is\n\
3268able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003269
3270static PyObject *
3271string_decode(PyStringObject *self, PyObject *args)
3272{
3273 char *encoding = NULL;
3274 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003276
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003277 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3278 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003279 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003280 if (v == NULL)
3281 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003282 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3283 PyErr_Format(PyExc_TypeError,
3284 "decoder did not return a string/unicode object "
3285 "(type=%.400s)",
3286 v->ob_type->tp_name);
3287 Py_DECREF(v);
3288 return NULL;
3289 }
3290 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003291
3292 onError:
3293 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003294}
3295
3296
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003297PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298"S.expandtabs([tabsize]) -> string\n\
3299\n\
3300Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003301If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003302
3303static PyObject*
3304string_expandtabs(PyStringObject *self, PyObject *args)
3305{
Guido van Rossum44a93e52008-03-11 21:14:54 +00003306 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003307 char *q;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003308 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003309 PyObject *u;
3310 int tabsize = 8;
3311
3312 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3313 return NULL;
3314
Thomas Wouters7e474022000-07-16 12:04:32 +00003315 /* First pass: determine size of output string */
Guido van Rossum44a93e52008-03-11 21:14:54 +00003316 i = 0; /* chars up to and including most recent \n or \r */
3317 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3318 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003319 for (p = PyString_AS_STRING(self); p < e; p++)
3320 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003321 if (tabsize > 0) {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003322 incr = tabsize - (j % tabsize);
3323 if (j > PY_SSIZE_T_MAX - incr)
3324 goto overflow1;
3325 j += incr;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003326 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003327 }
3328 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003329 if (j > PY_SSIZE_T_MAX - 1)
3330 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003331 j++;
3332 if (*p == '\n' || *p == '\r') {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003333 if (i > PY_SSIZE_T_MAX - j)
3334 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003335 i += j;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003336 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003337 }
3338 }
3339
Guido van Rossum44a93e52008-03-11 21:14:54 +00003340 if (i > PY_SSIZE_T_MAX - j)
3341 goto overflow1;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003342
Guido van Rossum4c08d552000-03-10 22:55:18 +00003343 /* Second pass: create output string and fill it */
3344 u = PyString_FromStringAndSize(NULL, i + j);
3345 if (!u)
3346 return NULL;
3347
Guido van Rossum44a93e52008-03-11 21:14:54 +00003348 j = 0; /* same as in first pass */
3349 q = PyString_AS_STRING(u); /* next output char */
3350 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003351
3352 for (p = PyString_AS_STRING(self); p < e; p++)
3353 if (*p == '\t') {
3354 if (tabsize > 0) {
3355 i = tabsize - (j % tabsize);
3356 j += i;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003357 while (i--) {
3358 if (q >= qe)
3359 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003360 *q++ = ' ';
Guido van Rossum44a93e52008-03-11 21:14:54 +00003361 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003362 }
3363 }
3364 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003365 if (q >= qe)
3366 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 *q++ = *p;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003368 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003369 if (*p == '\n' || *p == '\r')
3370 j = 0;
3371 }
3372
3373 return u;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003374
3375 overflow2:
3376 Py_DECREF(u);
3377 overflow1:
3378 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3379 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003380}
3381
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003382Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003383pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384{
3385 PyObject *u;
3386
3387 if (left < 0)
3388 left = 0;
3389 if (right < 0)
3390 right = 0;
3391
Tim Peters8fa5dd02001-09-12 02:18:30 +00003392 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003393 Py_INCREF(self);
3394 return (PyObject *)self;
3395 }
3396
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003397 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398 left + PyString_GET_SIZE(self) + right);
3399 if (u) {
3400 if (left)
3401 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003402 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003403 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003404 PyString_GET_SIZE(self));
3405 if (right)
3406 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3407 fill, right);
3408 }
3409
3410 return u;
3411}
3412
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003413PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003414"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003415"\n"
3416"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003417"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003418
3419static PyObject *
3420string_ljust(PyStringObject *self, PyObject *args)
3421{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003422 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003423 char fillchar = ' ';
3424
Thomas Wouters4abb3662006-04-19 14:50:15 +00003425 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426 return NULL;
3427
Tim Peters8fa5dd02001-09-12 02:18:30 +00003428 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429 Py_INCREF(self);
3430 return (PyObject*) self;
3431 }
3432
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003433 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434}
3435
3436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003437PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003438"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003439"\n"
3440"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003441"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442
3443static PyObject *
3444string_rjust(PyStringObject *self, PyObject *args)
3445{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003446 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003447 char fillchar = ' ';
3448
Thomas Wouters4abb3662006-04-19 14:50:15 +00003449 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003450 return NULL;
3451
Tim Peters8fa5dd02001-09-12 02:18:30 +00003452 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453 Py_INCREF(self);
3454 return (PyObject*) self;
3455 }
3456
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003457 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458}
3459
3460
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003461PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003462"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003463"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003464"Return S centered in a string of length width. Padding is\n"
3465"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466
3467static PyObject *
3468string_center(PyStringObject *self, PyObject *args)
3469{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003470 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003471 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003472 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003473
Thomas Wouters4abb3662006-04-19 14:50:15 +00003474 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475 return NULL;
3476
Tim Peters8fa5dd02001-09-12 02:18:30 +00003477 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003478 Py_INCREF(self);
3479 return (PyObject*) self;
3480 }
3481
3482 marg = width - PyString_GET_SIZE(self);
3483 left = marg / 2 + (marg & width & 1);
3484
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003485 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003486}
3487
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003489"S.zfill(width) -> string\n"
3490"\n"
3491"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003492"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003493
3494static PyObject *
3495string_zfill(PyStringObject *self, PyObject *args)
3496{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003497 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003498 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003499 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003500 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003501
Thomas Wouters4abb3662006-04-19 14:50:15 +00003502 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003503 return NULL;
3504
3505 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003506 if (PyString_CheckExact(self)) {
3507 Py_INCREF(self);
3508 return (PyObject*) self;
3509 }
3510 else
3511 return PyString_FromStringAndSize(
3512 PyString_AS_STRING(self),
3513 PyString_GET_SIZE(self)
3514 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003515 }
3516
3517 fill = width - PyString_GET_SIZE(self);
3518
3519 s = pad(self, fill, 0, '0');
3520
3521 if (s == NULL)
3522 return NULL;
3523
3524 p = PyString_AS_STRING(s);
3525 if (p[fill] == '+' || p[fill] == '-') {
3526 /* move sign to beginning of string */
3527 p[0] = p[fill];
3528 p[fill] = '0';
3529 }
3530
3531 return (PyObject*) s;
3532}
3533
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003534PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003535"S.isspace() -> bool\n\
3536\n\
3537Return True if all characters in S are whitespace\n\
3538and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003539
3540static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003541string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542{
Fred Drakeba096332000-07-09 07:04:36 +00003543 register const unsigned char *p
3544 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003545 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546
Guido van Rossum4c08d552000-03-10 22:55:18 +00003547 /* Shortcut for single character strings */
3548 if (PyString_GET_SIZE(self) == 1 &&
3549 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003551
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003552 /* Special case for empty strings */
3553 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003555
Guido van Rossum4c08d552000-03-10 22:55:18 +00003556 e = p + PyString_GET_SIZE(self);
3557 for (; p < e; p++) {
3558 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003559 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003560 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003561 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003562}
3563
3564
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003565PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003566"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003568Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003569and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003572string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573{
Fred Drakeba096332000-07-09 07:04:36 +00003574 register const unsigned char *p
3575 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576 register const unsigned char *e;
3577
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578 /* Shortcut for single character strings */
3579 if (PyString_GET_SIZE(self) == 1 &&
3580 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582
3583 /* Special case for empty strings */
3584 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586
3587 e = p + PyString_GET_SIZE(self);
3588 for (; p < e; p++) {
3589 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003590 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003591 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003592 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593}
3594
3595
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003597"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003598\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003599Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003600and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003601
3602static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003603string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003604{
Fred Drakeba096332000-07-09 07:04:36 +00003605 register const unsigned char *p
3606 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607 register const unsigned char *e;
3608
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003609 /* Shortcut for single character strings */
3610 if (PyString_GET_SIZE(self) == 1 &&
3611 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613
3614 /* Special case for empty strings */
3615 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003617
3618 e = p + PyString_GET_SIZE(self);
3619 for (; p < e; p++) {
3620 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003622 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003624}
3625
3626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003627PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003628"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003630Return True if all characters in S are digits\n\
3631and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632
3633static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003634string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635{
Fred Drakeba096332000-07-09 07:04:36 +00003636 register const unsigned char *p
3637 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003638 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640 /* Shortcut for single character strings */
3641 if (PyString_GET_SIZE(self) == 1 &&
3642 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003645 /* Special case for empty strings */
3646 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003648
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649 e = p + PyString_GET_SIZE(self);
3650 for (; p < e; p++) {
3651 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003652 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655}
3656
3657
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003658PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003659"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003662at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663
3664static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003665string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666{
Fred Drakeba096332000-07-09 07:04:36 +00003667 register const unsigned char *p
3668 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003669 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670 int cased;
3671
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 /* Shortcut for single character strings */
3673 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003675
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003676 /* Special case for empty strings */
3677 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003678 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003679
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680 e = p + PyString_GET_SIZE(self);
3681 cased = 0;
3682 for (; p < e; p++) {
3683 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685 else if (!cased && islower(*p))
3686 cased = 1;
3687 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689}
3690
3691
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003692PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003693"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003695Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003696at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697
3698static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003699string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700{
Fred Drakeba096332000-07-09 07:04:36 +00003701 register const unsigned char *p
3702 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003703 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704 int cased;
3705
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706 /* Shortcut for single character strings */
3707 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003710 /* Special case for empty strings */
3711 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003712 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003713
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714 e = p + PyString_GET_SIZE(self);
3715 cased = 0;
3716 for (; p < e; p++) {
3717 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 else if (!cased && isupper(*p))
3720 cased = 1;
3721 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003722 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723}
3724
3725
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003726PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003729Return True if S is a titlecased string and there is at least one\n\
3730character in S, i.e. uppercase characters may only follow uncased\n\
3731characters and lowercase characters only cased ones. Return False\n\
3732otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733
3734static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003735string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736{
Fred Drakeba096332000-07-09 07:04:36 +00003737 register const unsigned char *p
3738 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003739 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740 int cased, previous_is_cased;
3741
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742 /* Shortcut for single character strings */
3743 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003746 /* Special case for empty strings */
3747 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003748 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003749
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750 e = p + PyString_GET_SIZE(self);
3751 cased = 0;
3752 previous_is_cased = 0;
3753 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003754 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755
3756 if (isupper(ch)) {
3757 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003758 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759 previous_is_cased = 1;
3760 cased = 1;
3761 }
3762 else if (islower(ch)) {
3763 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003764 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 previous_is_cased = 1;
3766 cased = 1;
3767 }
3768 else
3769 previous_is_cased = 0;
3770 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003771 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772}
3773
3774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003775PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003776"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777\n\
3778Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003779Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003780is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782static PyObject*
3783string_splitlines(PyStringObject *self, PyObject *args)
3784{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003785 register Py_ssize_t i;
3786 register Py_ssize_t j;
3787 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003788 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 PyObject *list;
3790 PyObject *str;
3791 char *data;
3792
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003793 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 return NULL;
3795
3796 data = PyString_AS_STRING(self);
3797 len = PyString_GET_SIZE(self);
3798
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003799 /* This does not use the preallocated list because splitlines is
3800 usually run with hundreds of newlines. The overhead of
3801 switching between PyList_SET_ITEM and append causes about a
3802 2-3% slowdown for that common case. A smarter implementation
3803 could move the if check out, so the SET_ITEMs are done first
3804 and the appends only done when the prealloc buffer is full.
3805 That's too much work for little gain.*/
3806
Guido van Rossum4c08d552000-03-10 22:55:18 +00003807 list = PyList_New(0);
3808 if (!list)
3809 goto onError;
3810
3811 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003812 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003813
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814 /* Find a line and append it */
3815 while (i < len && data[i] != '\n' && data[i] != '\r')
3816 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817
3818 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003819 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 if (i < len) {
3821 if (data[i] == '\r' && i + 1 < len &&
3822 data[i+1] == '\n')
3823 i += 2;
3824 else
3825 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003826 if (keepends)
3827 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003828 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003829 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 j = i;
3831 }
3832 if (j < len) {
3833 SPLIT_APPEND(data, j, len);
3834 }
3835
3836 return list;
3837
3838 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003839 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840 return NULL;
3841}
3842
3843#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003844#undef SPLIT_ADD
3845#undef MAX_PREALLOC
3846#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003848static PyObject *
3849string_getnewargs(PyStringObject *v)
3850{
3851 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3852}
3853
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003854
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003855static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003856string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003857 /* Counterparts of the obsolete stropmodule functions; except
3858 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003859 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3860 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003861 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003862 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3863 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003864 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3865 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3866 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3867 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3868 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3869 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3870 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003871 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3872 capitalize__doc__},
3873 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3874 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3875 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003876 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003877 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3878 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3879 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3880 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3881 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3882 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3883 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003884 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3885 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003886 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3887 startswith__doc__},
3888 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3889 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3890 swapcase__doc__},
3891 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3892 translate__doc__},
3893 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3894 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3895 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3896 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3897 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3898 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3899 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3900 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3901 expandtabs__doc__},
3902 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3903 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003904 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003905 {NULL, NULL} /* sentinel */
3906};
3907
Jeremy Hylton938ace62002-07-17 16:30:39 +00003908static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003909str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3910
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003911static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003912string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003913{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003914 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003915 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003916
Guido van Rossumae960af2001-08-30 03:11:59 +00003917 if (type != &PyString_Type)
3918 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003919 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3920 return NULL;
3921 if (x == NULL)
3922 return PyString_FromString("");
3923 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003924}
3925
Guido van Rossumae960af2001-08-30 03:11:59 +00003926static PyObject *
3927str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3928{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003929 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003930 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003931
3932 assert(PyType_IsSubtype(type, &PyString_Type));
3933 tmp = string_new(&PyString_Type, args, kwds);
3934 if (tmp == NULL)
3935 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003936 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003937 n = PyString_GET_SIZE(tmp);
3938 pnew = type->tp_alloc(type, n);
3939 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003940 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003941 ((PyStringObject *)pnew)->ob_shash =
3942 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003943 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003944 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003945 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003946 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003947}
3948
Guido van Rossumcacfc072002-05-24 19:01:59 +00003949static PyObject *
3950basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3951{
3952 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003953 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003954 return NULL;
3955}
3956
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003957static PyObject *
3958string_mod(PyObject *v, PyObject *w)
3959{
3960 if (!PyString_Check(v)) {
3961 Py_INCREF(Py_NotImplemented);
3962 return Py_NotImplemented;
3963 }
3964 return PyString_Format(v, w);
3965}
3966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003967PyDoc_STRVAR(basestring_doc,
3968"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003969
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003970static PyNumberMethods string_as_number = {
3971 0, /*nb_add*/
3972 0, /*nb_subtract*/
3973 0, /*nb_multiply*/
3974 0, /*nb_divide*/
3975 string_mod, /*nb_remainder*/
3976};
3977
3978
Guido van Rossumcacfc072002-05-24 19:01:59 +00003979PyTypeObject PyBaseString_Type = {
3980 PyObject_HEAD_INIT(&PyType_Type)
3981 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003982 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003983 0,
3984 0,
3985 0, /* tp_dealloc */
3986 0, /* tp_print */
3987 0, /* tp_getattr */
3988 0, /* tp_setattr */
3989 0, /* tp_compare */
3990 0, /* tp_repr */
3991 0, /* tp_as_number */
3992 0, /* tp_as_sequence */
3993 0, /* tp_as_mapping */
3994 0, /* tp_hash */
3995 0, /* tp_call */
3996 0, /* tp_str */
3997 0, /* tp_getattro */
3998 0, /* tp_setattro */
3999 0, /* tp_as_buffer */
4000 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4001 basestring_doc, /* tp_doc */
4002 0, /* tp_traverse */
4003 0, /* tp_clear */
4004 0, /* tp_richcompare */
4005 0, /* tp_weaklistoffset */
4006 0, /* tp_iter */
4007 0, /* tp_iternext */
4008 0, /* tp_methods */
4009 0, /* tp_members */
4010 0, /* tp_getset */
4011 &PyBaseObject_Type, /* tp_base */
4012 0, /* tp_dict */
4013 0, /* tp_descr_get */
4014 0, /* tp_descr_set */
4015 0, /* tp_dictoffset */
4016 0, /* tp_init */
4017 0, /* tp_alloc */
4018 basestring_new, /* tp_new */
4019 0, /* tp_free */
4020};
4021
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004022PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023"str(object) -> string\n\
4024\n\
4025Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004026If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004027
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004028PyTypeObject PyString_Type = {
4029 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004030 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004032 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004033 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004034 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004035 (printfunc)string_print, /* tp_print */
4036 0, /* tp_getattr */
4037 0, /* tp_setattr */
4038 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004039 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004040 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004041 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004042 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004043 (hashfunc)string_hash, /* tp_hash */
4044 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004045 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004046 PyObject_GenericGetAttr, /* tp_getattro */
4047 0, /* tp_setattro */
4048 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004049 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004050 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004051 string_doc, /* tp_doc */
4052 0, /* tp_traverse */
4053 0, /* tp_clear */
4054 (richcmpfunc)string_richcompare, /* tp_richcompare */
4055 0, /* tp_weaklistoffset */
4056 0, /* tp_iter */
4057 0, /* tp_iternext */
4058 string_methods, /* tp_methods */
4059 0, /* tp_members */
4060 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004061 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004062 0, /* tp_dict */
4063 0, /* tp_descr_get */
4064 0, /* tp_descr_set */
4065 0, /* tp_dictoffset */
4066 0, /* tp_init */
4067 0, /* tp_alloc */
4068 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004069 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070};
4071
4072void
Fred Drakeba096332000-07-09 07:04:36 +00004073PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004074{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004075 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004076 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004077 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004078 if (w == NULL || !PyString_Check(*pv)) {
4079 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004080 *pv = NULL;
4081 return;
4082 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 v = string_concat((PyStringObject *) *pv, w);
4084 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085 *pv = v;
4086}
4087
Guido van Rossum013142a1994-08-30 08:19:36 +00004088void
Fred Drakeba096332000-07-09 07:04:36 +00004089PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004090{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 PyString_Concat(pv, w);
4092 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004093}
4094
4095
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004096/* The following function breaks the notion that strings are immutable:
4097 it changes the size of a string. We get away with this only if there
4098 is only one module referencing the object. You can also think of it
4099 as creating a new string object and destroying the old one, only
4100 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004101 already be known to some other part of the code...
4102 Note that if there's not enough memory to resize the string, the original
4103 string object at *pv is deallocated, *pv is set to NULL, an "out of
4104 memory" exception is set, and -1 is returned. Else (on success) 0 is
4105 returned, and the value in *pv may or may not be the same as on input.
4106 As always, an extra byte is allocated for a trailing \0 byte (newsize
4107 does *not* include that), and a trailing \0 byte is stored.
4108*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004109
4110int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004111_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004112{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 register PyObject *v;
4114 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004115 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004116 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4117 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004118 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004119 Py_DECREF(v);
4120 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004121 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004122 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004123 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004124 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004125 _Py_ForgetReference(v);
4126 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004127 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004128 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004129 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004130 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004131 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004132 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004133 _Py_NewReference(*pv);
4134 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004135 sv->ob_size = newsize;
4136 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004137 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004138 return 0;
4139}
Guido van Rossume5372401993-03-16 12:15:04 +00004140
4141/* Helpers for formatstring */
4142
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004143Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004144getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004145{
Thomas Wouters977485d2006-02-16 15:59:12 +00004146 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004147 if (argidx < arglen) {
4148 (*p_argidx)++;
4149 if (arglen < 0)
4150 return args;
4151 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004152 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004153 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004154 PyErr_SetString(PyExc_TypeError,
4155 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004156 return NULL;
4157}
4158
Tim Peters38fd5b62000-09-21 05:43:11 +00004159/* Format codes
4160 * F_LJUST '-'
4161 * F_SIGN '+'
4162 * F_BLANK ' '
4163 * F_ALT '#'
4164 * F_ZERO '0'
4165 */
Guido van Rossume5372401993-03-16 12:15:04 +00004166#define F_LJUST (1<<0)
4167#define F_SIGN (1<<1)
4168#define F_BLANK (1<<2)
4169#define F_ALT (1<<3)
4170#define F_ZERO (1<<4)
4171
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004172Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004173formatfloat(char *buf, size_t buflen, int flags,
4174 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004175{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004176 /* fmt = '%#.' + `prec` + `type`
4177 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004178 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004179 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004180 x = PyFloat_AsDouble(v);
4181 if (x == -1.0 && PyErr_Occurred()) {
4182 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004183 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004184 }
Guido van Rossume5372401993-03-16 12:15:04 +00004185 if (prec < 0)
4186 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004187 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4188 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004189 /* Worst case length calc to ensure no buffer overrun:
4190
4191 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004192 fmt = %#.<prec>g
4193 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004194 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004195 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004196
4197 'f' formats:
4198 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4199 len = 1 + 50 + 1 + prec = 52 + prec
4200
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004201 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004202 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004203
4204 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004205 if (((type == 'g' || type == 'G') &&
4206 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004207 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004208 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004209 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004210 return -1;
4211 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004212 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4213 (flags&F_ALT) ? "#" : "",
4214 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004215 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004216 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004217}
4218
Tim Peters38fd5b62000-09-21 05:43:11 +00004219/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4220 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4221 * Python's regular ints.
4222 * Return value: a new PyString*, or NULL if error.
4223 * . *pbuf is set to point into it,
4224 * *plen set to the # of chars following that.
4225 * Caller must decref it when done using pbuf.
4226 * The string starting at *pbuf is of the form
4227 * "-"? ("0x" | "0X")? digit+
4228 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004229 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004230 * There will be at least prec digits, zero-filled on the left if
4231 * necessary to get that many.
4232 * val object to be converted
4233 * flags bitmask of format flags; only F_ALT is looked at
4234 * prec minimum number of digits; 0-fill on left if needed
4235 * type a character in [duoxX]; u acts the same as d
4236 *
4237 * CAUTION: o, x and X conversions on regular ints can never
4238 * produce a '-' sign, but can for Python's unbounded ints.
4239 */
4240PyObject*
4241_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4242 char **pbuf, int *plen)
4243{
4244 PyObject *result = NULL;
4245 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004246 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004247 int sign; /* 1 if '-', else 0 */
4248 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004249 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004250 int numdigits; /* len == numnondigits + numdigits */
4251 int numnondigits = 0;
4252
4253 switch (type) {
4254 case 'd':
4255 case 'u':
4256 result = val->ob_type->tp_str(val);
4257 break;
4258 case 'o':
4259 result = val->ob_type->tp_as_number->nb_oct(val);
4260 break;
4261 case 'x':
4262 case 'X':
4263 numnondigits = 2;
4264 result = val->ob_type->tp_as_number->nb_hex(val);
4265 break;
4266 default:
4267 assert(!"'type' not in [duoxX]");
4268 }
4269 if (!result)
4270 return NULL;
4271
Neal Norwitz56423e52006-08-13 18:11:08 +00004272 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004273 if (!buf) {
4274 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004275 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004276 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004277
Tim Peters38fd5b62000-09-21 05:43:11 +00004278 /* To modify the string in-place, there can only be one reference. */
4279 if (result->ob_refcnt != 1) {
4280 PyErr_BadInternalCall();
4281 return NULL;
4282 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004283 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004284 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004285 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4286 return NULL;
4287 }
4288 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004289 if (buf[len-1] == 'L') {
4290 --len;
4291 buf[len] = '\0';
4292 }
4293 sign = buf[0] == '-';
4294 numnondigits += sign;
4295 numdigits = len - numnondigits;
4296 assert(numdigits > 0);
4297
Tim Petersfff53252001-04-12 18:38:48 +00004298 /* Get rid of base marker unless F_ALT */
4299 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004300 /* Need to skip 0x, 0X or 0. */
4301 int skipped = 0;
4302 switch (type) {
4303 case 'o':
4304 assert(buf[sign] == '0');
4305 /* If 0 is only digit, leave it alone. */
4306 if (numdigits > 1) {
4307 skipped = 1;
4308 --numdigits;
4309 }
4310 break;
4311 case 'x':
4312 case 'X':
4313 assert(buf[sign] == '0');
4314 assert(buf[sign + 1] == 'x');
4315 skipped = 2;
4316 numnondigits -= 2;
4317 break;
4318 }
4319 if (skipped) {
4320 buf += skipped;
4321 len -= skipped;
4322 if (sign)
4323 buf[0] = '-';
4324 }
4325 assert(len == numnondigits + numdigits);
4326 assert(numdigits > 0);
4327 }
4328
4329 /* Fill with leading zeroes to meet minimum width. */
4330 if (prec > numdigits) {
4331 PyObject *r1 = PyString_FromStringAndSize(NULL,
4332 numnondigits + prec);
4333 char *b1;
4334 if (!r1) {
4335 Py_DECREF(result);
4336 return NULL;
4337 }
4338 b1 = PyString_AS_STRING(r1);
4339 for (i = 0; i < numnondigits; ++i)
4340 *b1++ = *buf++;
4341 for (i = 0; i < prec - numdigits; i++)
4342 *b1++ = '0';
4343 for (i = 0; i < numdigits; i++)
4344 *b1++ = *buf++;
4345 *b1 = '\0';
4346 Py_DECREF(result);
4347 result = r1;
4348 buf = PyString_AS_STRING(result);
4349 len = numnondigits + prec;
4350 }
4351
4352 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004353 if (type == 'X') {
4354 /* Need to convert all lower case letters to upper case.
4355 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004356 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004357 if (buf[i] >= 'a' && buf[i] <= 'x')
4358 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004359 }
4360 *pbuf = buf;
4361 *plen = len;
4362 return result;
4363}
4364
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004365Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004366formatint(char *buf, size_t buflen, int flags,
4367 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004368{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004369 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004370 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4371 + 1 + 1 = 24 */
4372 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004373 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004374 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004375
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004376 x = PyInt_AsLong(v);
4377 if (x == -1 && PyErr_Occurred()) {
4378 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004379 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004380 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004381 if (x < 0 && type == 'u') {
4382 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004383 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004384 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4385 sign = "-";
4386 else
4387 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004388 if (prec < 0)
4389 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390
4391 if ((flags & F_ALT) &&
4392 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004393 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004394 * of issues that cause pain:
4395 * - when 0 is being converted, the C standard leaves off
4396 * the '0x' or '0X', which is inconsistent with other
4397 * %#x/%#X conversions and inconsistent with Python's
4398 * hex() function
4399 * - there are platforms that violate the standard and
4400 * convert 0 with the '0x' or '0X'
4401 * (Metrowerks, Compaq Tru64)
4402 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004403 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004404 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004405 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004406 * We can achieve the desired consistency by inserting our
4407 * own '0x' or '0X' prefix, and substituting %x/%X in place
4408 * of %#x/%#X.
4409 *
4410 * Note that this is the same approach as used in
4411 * formatint() in unicodeobject.c
4412 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004413 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4414 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004415 }
4416 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004417 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4418 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004419 prec, type);
4420 }
4421
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004422 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4423 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004424 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004425 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004426 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004427 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004428 return -1;
4429 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004430 if (sign[0])
4431 PyOS_snprintf(buf, buflen, fmt, -x);
4432 else
4433 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004434 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004435}
4436
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004437Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004438formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004439{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004440 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004441 if (PyString_Check(v)) {
4442 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004443 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004444 }
4445 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004446 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004447 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004448 }
4449 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004450 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004451}
4452
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004453/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4454
4455 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4456 chars are formatted. XXX This is a magic number. Each formatting
4457 routine does bounds checking to ensure no overflow, but a better
4458 solution may be to malloc a buffer of appropriate size for each
4459 format. For now, the current solution is sufficient.
4460*/
4461#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004462
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004464PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004465{
4466 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004467 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004468 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004469 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004470 PyObject *result, *orig_args;
4471#ifdef Py_USING_UNICODE
4472 PyObject *v, *w;
4473#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 PyObject *dict = NULL;
4475 if (format == NULL || !PyString_Check(format) || args == NULL) {
4476 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004477 return NULL;
4478 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004479 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004480 fmt = PyString_AS_STRING(format);
4481 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004482 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004483 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004484 if (result == NULL)
4485 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004486 res = PyString_AsString(result);
4487 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004488 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004489 argidx = 0;
4490 }
4491 else {
4492 arglen = -1;
4493 argidx = -2;
4494 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004495 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4496 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004497 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004498 while (--fmtcnt >= 0) {
4499 if (*fmt != '%') {
4500 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004501 rescnt = fmtcnt + 100;
4502 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004503 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004504 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004505 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004506 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004507 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004508 }
4509 *res++ = *fmt++;
4510 }
4511 else {
4512 /* Got a format specifier */
4513 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004514 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004515 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004516 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004517 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 PyObject *v = NULL;
4519 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004520 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004521 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004522 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004523 char formatbuf[FORMATBUFLEN];
4524 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004525#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004526 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004527 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004528#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004529
Guido van Rossumda9c2711996-12-05 21:58:58 +00004530 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004531 if (*fmt == '(') {
4532 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004533 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004534 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004535 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004536
4537 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004538 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004539 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004540 goto error;
4541 }
4542 ++fmt;
4543 --fmtcnt;
4544 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004545 /* Skip over balanced parentheses */
4546 while (pcount > 0 && --fmtcnt >= 0) {
4547 if (*fmt == ')')
4548 --pcount;
4549 else if (*fmt == '(')
4550 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004552 }
4553 keylen = fmt - keystart - 1;
4554 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004556 "incomplete format key");
4557 goto error;
4558 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004559 key = PyString_FromStringAndSize(keystart,
4560 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004561 if (key == NULL)
4562 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004563 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004564 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004565 args_owned = 0;
4566 }
4567 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004568 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004569 if (args == NULL) {
4570 goto error;
4571 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004572 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004573 arglen = -1;
4574 argidx = -2;
4575 }
Guido van Rossume5372401993-03-16 12:15:04 +00004576 while (--fmtcnt >= 0) {
4577 switch (c = *fmt++) {
4578 case '-': flags |= F_LJUST; continue;
4579 case '+': flags |= F_SIGN; continue;
4580 case ' ': flags |= F_BLANK; continue;
4581 case '#': flags |= F_ALT; continue;
4582 case '0': flags |= F_ZERO; continue;
4583 }
4584 break;
4585 }
4586 if (c == '*') {
4587 v = getnextarg(args, arglen, &argidx);
4588 if (v == NULL)
4589 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004590 if (!PyInt_Check(v)) {
4591 PyErr_SetString(PyExc_TypeError,
4592 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004593 goto error;
4594 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004596 if (width < 0) {
4597 flags |= F_LJUST;
4598 width = -width;
4599 }
Guido van Rossume5372401993-03-16 12:15:04 +00004600 if (--fmtcnt >= 0)
4601 c = *fmt++;
4602 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004603 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004604 width = c - '0';
4605 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004606 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004607 if (!isdigit(c))
4608 break;
4609 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004610 PyErr_SetString(
4611 PyExc_ValueError,
4612 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004613 goto error;
4614 }
4615 width = width*10 + (c - '0');
4616 }
4617 }
4618 if (c == '.') {
4619 prec = 0;
4620 if (--fmtcnt >= 0)
4621 c = *fmt++;
4622 if (c == '*') {
4623 v = getnextarg(args, arglen, &argidx);
4624 if (v == NULL)
4625 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004626 if (!PyInt_Check(v)) {
4627 PyErr_SetString(
4628 PyExc_TypeError,
4629 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004630 goto error;
4631 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004632 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004633 if (prec < 0)
4634 prec = 0;
4635 if (--fmtcnt >= 0)
4636 c = *fmt++;
4637 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004638 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004639 prec = c - '0';
4640 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004641 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004642 if (!isdigit(c))
4643 break;
4644 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004645 PyErr_SetString(
4646 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004647 "prec too big");
4648 goto error;
4649 }
4650 prec = prec*10 + (c - '0');
4651 }
4652 }
4653 } /* prec */
4654 if (fmtcnt >= 0) {
4655 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004656 if (--fmtcnt >= 0)
4657 c = *fmt++;
4658 }
4659 }
4660 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004661 PyErr_SetString(PyExc_ValueError,
4662 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004663 goto error;
4664 }
4665 if (c != '%') {
4666 v = getnextarg(args, arglen, &argidx);
4667 if (v == NULL)
4668 goto error;
4669 }
4670 sign = 0;
4671 fill = ' ';
4672 switch (c) {
4673 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004674 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004675 len = 1;
4676 break;
4677 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004678#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004679 if (PyUnicode_Check(v)) {
4680 fmt = fmt_start;
4681 argidx = argidx_start;
4682 goto unicode;
4683 }
Georg Brandld45014b2005-10-01 17:06:00 +00004684#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004685 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004686#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004687 if (temp != NULL && PyUnicode_Check(temp)) {
4688 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004689 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004690 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004691 goto unicode;
4692 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004693#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004694 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004695 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004696 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004697 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004698 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004699 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004700 if (!PyString_Check(temp)) {
4701 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004702 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004703 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004704 goto error;
4705 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004706 pbuf = PyString_AS_STRING(temp);
4707 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004708 if (prec >= 0 && len > prec)
4709 len = prec;
4710 break;
4711 case 'i':
4712 case 'd':
4713 case 'u':
4714 case 'o':
4715 case 'x':
4716 case 'X':
4717 if (c == 'i')
4718 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004719 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004720 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004721 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004722 prec, c, &pbuf, &ilen);
4723 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004724 if (!temp)
4725 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004726 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004727 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004728 else {
4729 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004730 len = formatint(pbuf,
4731 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004732 flags, prec, c, v);
4733 if (len < 0)
4734 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004735 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004736 }
4737 if (flags & F_ZERO)
4738 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004739 break;
4740 case 'e':
4741 case 'E':
4742 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004743 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004744 case 'g':
4745 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004746 if (c == 'F')
4747 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004748 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004749 len = formatfloat(pbuf, sizeof(formatbuf),
4750 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004751 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004752 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004753 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004754 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004755 fill = '0';
4756 break;
4757 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004758#ifdef Py_USING_UNICODE
4759 if (PyUnicode_Check(v)) {
4760 fmt = fmt_start;
4761 argidx = argidx_start;
4762 goto unicode;
4763 }
4764#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004765 pbuf = formatbuf;
4766 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004767 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004768 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004769 break;
4770 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004771 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004772 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004773 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004774 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004775 (Py_ssize_t)(fmt - 1 -
4776 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004777 goto error;
4778 }
4779 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004780 if (*pbuf == '-' || *pbuf == '+') {
4781 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004782 len--;
4783 }
4784 else if (flags & F_SIGN)
4785 sign = '+';
4786 else if (flags & F_BLANK)
4787 sign = ' ';
4788 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004789 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004790 }
4791 if (width < len)
4792 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004793 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004794 reslen -= rescnt;
4795 rescnt = width + fmtcnt + 100;
4796 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004797 if (reslen < 0) {
4798 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004799 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004800 return PyErr_NoMemory();
4801 }
Georg Brandl5f795862007-02-26 13:51:34 +00004802 if (_PyString_Resize(&result, reslen) < 0) {
4803 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004804 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004805 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004806 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004807 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004808 }
4809 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004810 if (fill != ' ')
4811 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004812 rescnt--;
4813 if (width > len)
4814 width--;
4815 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004816 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4817 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004818 assert(pbuf[1] == c);
4819 if (fill != ' ') {
4820 *res++ = *pbuf++;
4821 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004822 }
Tim Petersfff53252001-04-12 18:38:48 +00004823 rescnt -= 2;
4824 width -= 2;
4825 if (width < 0)
4826 width = 0;
4827 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004828 }
4829 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004830 do {
4831 --rescnt;
4832 *res++ = fill;
4833 } while (--width > len);
4834 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004835 if (fill == ' ') {
4836 if (sign)
4837 *res++ = sign;
4838 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004839 (c == 'x' || c == 'X')) {
4840 assert(pbuf[0] == '0');
4841 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004842 *res++ = *pbuf++;
4843 *res++ = *pbuf++;
4844 }
4845 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004846 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004847 res += len;
4848 rescnt -= len;
4849 while (--width >= len) {
4850 --rescnt;
4851 *res++ = ' ';
4852 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004853 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004854 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004855 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004856 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004857 goto error;
4858 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004859 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004860 } /* '%' */
4861 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004862 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004863 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004864 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004865 goto error;
4866 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004867 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004868 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004869 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004871 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004872
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004873#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004874 unicode:
4875 if (args_owned) {
4876 Py_DECREF(args);
4877 args_owned = 0;
4878 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004879 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004880 if (PyTuple_Check(orig_args) && argidx > 0) {
4881 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004882 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004883 v = PyTuple_New(n);
4884 if (v == NULL)
4885 goto error;
4886 while (--n >= 0) {
4887 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4888 Py_INCREF(w);
4889 PyTuple_SET_ITEM(v, n, w);
4890 }
4891 args = v;
4892 } else {
4893 Py_INCREF(orig_args);
4894 args = orig_args;
4895 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004896 args_owned = 1;
4897 /* Take what we have of the result and let the Unicode formatting
4898 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004899 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004900 if (_PyString_Resize(&result, rescnt))
4901 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004902 fmtcnt = PyString_GET_SIZE(format) - \
4903 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004904 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4905 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004906 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004907 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004908 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004909 if (v == NULL)
4910 goto error;
4911 /* Paste what we have (result) to what the Unicode formatting
4912 function returned (v) and return the result (or error) */
4913 w = PyUnicode_Concat(result, v);
4914 Py_DECREF(result);
4915 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004916 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004917 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004918#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004919
Guido van Rossume5372401993-03-16 12:15:04 +00004920 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004921 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004922 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004923 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004924 }
Guido van Rossume5372401993-03-16 12:15:04 +00004925 return NULL;
4926}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004927
Guido van Rossum2a61e741997-01-18 07:55:05 +00004928void
Fred Drakeba096332000-07-09 07:04:36 +00004929PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004930{
4931 register PyStringObject *s = (PyStringObject *)(*p);
4932 PyObject *t;
4933 if (s == NULL || !PyString_Check(s))
4934 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004935 /* If it's a string subclass, we don't really know what putting
4936 it in the interned dict might do. */
4937 if (!PyString_CheckExact(s))
4938 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004940 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941 if (interned == NULL) {
4942 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004943 if (interned == NULL) {
4944 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004945 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004946 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004948 t = PyDict_GetItem(interned, (PyObject *)s);
4949 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004950 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004951 Py_DECREF(*p);
4952 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004953 return;
4954 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004955
Armin Rigo79f7ad22004-08-07 19:27:39 +00004956 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004957 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004958 return;
4959 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004960 /* The two references in interned are not counted by refcnt.
4961 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004962 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004963 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004964}
4965
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004966void
4967PyString_InternImmortal(PyObject **p)
4968{
4969 PyString_InternInPlace(p);
4970 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4971 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4972 Py_INCREF(*p);
4973 }
4974}
4975
Guido van Rossum2a61e741997-01-18 07:55:05 +00004976
4977PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004978PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004979{
4980 PyObject *s = PyString_FromString(cp);
4981 if (s == NULL)
4982 return NULL;
4983 PyString_InternInPlace(&s);
4984 return s;
4985}
4986
Guido van Rossum8cf04761997-08-02 02:57:45 +00004987void
Fred Drakeba096332000-07-09 07:04:36 +00004988PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004989{
4990 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004991 for (i = 0; i < UCHAR_MAX + 1; i++) {
4992 Py_XDECREF(characters[i]);
4993 characters[i] = NULL;
4994 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004995 Py_XDECREF(nullstring);
4996 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004997}
Barry Warsawa903ad982001-02-23 16:40:48 +00004998
Barry Warsawa903ad982001-02-23 16:40:48 +00004999void _Py_ReleaseInternedStrings(void)
5000{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005001 PyObject *keys;
5002 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005003 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005004
5005 if (interned == NULL || !PyDict_Check(interned))
5006 return;
5007 keys = PyDict_Keys(interned);
5008 if (keys == NULL || !PyList_Check(keys)) {
5009 PyErr_Clear();
5010 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005011 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005012
5013 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5014 detector, interned strings are not forcibly deallocated; rather, we
5015 give them their stolen references back, and then clear and DECREF
5016 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005017
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005018 fprintf(stderr, "releasing interned strings\n");
5019 n = PyList_GET_SIZE(keys);
5020 for (i = 0; i < n; i++) {
5021 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5022 switch (s->ob_sstate) {
5023 case SSTATE_NOT_INTERNED:
5024 /* XXX Shouldn't happen */
5025 break;
5026 case SSTATE_INTERNED_IMMORTAL:
5027 s->ob_refcnt += 1;
5028 break;
5029 case SSTATE_INTERNED_MORTAL:
5030 s->ob_refcnt += 2;
5031 break;
5032 default:
5033 Py_FatalError("Inconsistent interned string state.");
5034 }
5035 s->ob_sstate = SSTATE_NOT_INTERNED;
5036 }
5037 Py_DECREF(keys);
5038 PyDict_Clear(interned);
5039 Py_DECREF(interned);
5040 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005041}