blob: 5ae2ca06c06e2e63647e62d79424f8e1f01d5f88 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000807 char *data = op->ob_sval;
808 Py_ssize_t size = op->ob_size;
809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000829 if (memchr(op->ob_sval, '\'', op->ob_size) &&
830 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 for (i = 0; i < op->ob_size; i++) {
835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000857 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000875 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000876 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000877 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
931 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
938 return a->ob_size;
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Andrew Dalke598710c2006-05-25 18:18:39 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000951 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000952 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000953 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 return NULL;
955 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000958 if ((a->ob_size == 0 || b->ob_size == 0) &&
959 PyString_CheckExact(a) && PyString_CheckExact(b)) {
960 if (a->ob_size == 0) {
961 Py_INCREF(bb);
962 return bb;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 Py_INCREF(a);
965 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966 }
967 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000968 if (size < 0) {
969 PyErr_SetString(PyExc_OverflowError,
970 "strings are too large to concat");
971 return NULL;
972 }
973
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000974 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000975 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000976 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000978 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000979 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000980 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000981 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
982 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985#undef b
986}
987
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000989string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991 register Py_ssize_t i;
992 register Py_ssize_t j;
993 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000995 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 if (n < 0)
997 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000998 /* watch out for overflows: the size can overflow int,
999 * and the # of bytes needed can overflow size_t
1000 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001002 if (n && size / n != a->ob_size) {
1003 PyErr_SetString(PyExc_OverflowError,
1004 "repeated string is too long");
1005 return NULL;
1006 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001007 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 Py_INCREF(a);
1009 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 }
Tim Peterse7c05322004-06-27 17:24:49 +00001011 nbytes = (size_t)size;
1012 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001013 PyErr_SetString(PyExc_OverflowError,
1014 "repeated string is too long");
1015 return NULL;
1016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001017 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001018 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001019 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001021 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001022 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001023 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001024 op->ob_sval[size] = '\0';
1025 if (a->ob_size == 1 && n > 0) {
1026 memset(op->ob_sval, a->ob_sval[0] , n);
1027 return (PyObject *) op;
1028 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001029 i = 0;
1030 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001031 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001033 }
1034 while (i < size) {
1035 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001036 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i += j;
1038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040}
1041
1042/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001045string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001046 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001047 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048{
1049 if (i < 0)
1050 i = 0;
1051 if (j < 0)
1052 j = 0; /* Avoid signed/unsigned bug in next line */
1053 if (j > a->ob_size)
1054 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001055 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1056 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001057 Py_INCREF(a);
1058 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 }
1060 if (j < i)
1061 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001062 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Guido van Rossum9284a572000-03-07 15:53:43 +00001065static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001066string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001070 if (PyUnicode_Check(sub_obj))
1071 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001074 PyErr_SetString(PyExc_TypeError,
1075 "'in <string>' requires string as left operand");
1076 return -1;
1077 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001078 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001079
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001081}
1082
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001083static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001084string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001086 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001089 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001090 return NULL;
1091 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 pchar = a->ob_sval[i];
1093 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001094 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001095 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001096 else {
1097#ifdef COUNT_ALLOCS
1098 one_strings++;
1099#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001100 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001101 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001102 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001103}
1104
Martin v. Löwiscd353062001-05-24 16:56:35 +00001105static PyObject*
1106string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001107{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001109 Py_ssize_t len_a, len_b;
1110 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 PyObject *result;
1112
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001113 /* Make sure both arguments are strings. */
1114 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001115 result = Py_NotImplemented;
1116 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001117 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 if (a == b) {
1119 switch (op) {
1120 case Py_EQ:case Py_LE:case Py_GE:
1121 result = Py_True;
1122 goto out;
1123 case Py_NE:case Py_LT:case Py_GT:
1124 result = Py_False;
1125 goto out;
1126 }
1127 }
1128 if (op == Py_EQ) {
1129 /* Supporting Py_NE here as well does not save
1130 much time, since Py_NE is rarely used. */
1131 if (a->ob_size == b->ob_size
1132 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001133 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001134 a->ob_size) == 0)) {
1135 result = Py_True;
1136 } else {
1137 result = Py_False;
1138 }
1139 goto out;
1140 }
1141 len_a = a->ob_size; len_b = b->ob_size;
1142 min_len = (len_a < len_b) ? len_a : len_b;
1143 if (min_len > 0) {
1144 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1145 if (c==0)
1146 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1147 }else
1148 c = 0;
1149 if (c == 0)
1150 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1151 switch (op) {
1152 case Py_LT: c = c < 0; break;
1153 case Py_LE: c = c <= 0; break;
1154 case Py_EQ: assert(0); break; /* unreachable */
1155 case Py_NE: c = c != 0; break;
1156 case Py_GT: c = c > 0; break;
1157 case Py_GE: c = c >= 0; break;
1158 default:
1159 result = Py_NotImplemented;
1160 goto out;
1161 }
1162 result = c ? Py_True : Py_False;
1163 out:
1164 Py_INCREF(result);
1165 return result;
1166}
1167
1168int
1169_PyString_Eq(PyObject *o1, PyObject *o2)
1170{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001171 PyStringObject *a = (PyStringObject*) o1;
1172 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001173 return a->ob_size == b->ob_size
1174 && *a->ob_sval == *b->ob_sval
1175 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001176}
1177
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178static long
Fred Drakeba096332000-07-09 07:04:36 +00001179string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 register unsigned char *p;
1183 register long x;
1184
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 if (a->ob_shash != -1)
1186 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 len = a->ob_size;
1188 p = (unsigned char *) a->ob_sval;
1189 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001191 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001192 x ^= a->ob_size;
1193 if (x == -1)
1194 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001196 return x;
1197}
1198
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199static PyObject*
1200string_subscript(PyStringObject* self, PyObject* item)
1201{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001202 if (PyIndex_Check(item)) {
1203 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 if (i == -1 && PyErr_Occurred())
1205 return NULL;
1206 if (i < 0)
1207 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001208 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 }
1210 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001211 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 char* source_buf;
1213 char* result_buf;
1214 PyObject* result;
1215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 PyString_GET_SIZE(self),
1218 &start, &stop, &step, &slicelength) < 0) {
1219 return NULL;
1220 }
1221
1222 if (slicelength <= 0) {
1223 return PyString_FromStringAndSize("", 0);
1224 }
1225 else {
1226 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001227 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001228 if (result_buf == NULL)
1229 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 cur += step, i++) {
1233 result_buf[i] = source_buf[cur];
1234 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001235
1236 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001237 slicelength);
1238 PyMem_Free(result_buf);
1239 return result;
1240 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001241 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001242 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001243 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 "string indices must be integers");
1245 return NULL;
1246 }
1247}
1248
Martin v. Löwis18e16552006-02-15 17:27:45 +00001249static Py_ssize_t
1250string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251{
1252 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001253 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001254 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255 return -1;
1256 }
1257 *ptr = (void *)self->ob_sval;
1258 return self->ob_size;
1259}
1260
Martin v. Löwis18e16552006-02-15 17:27:45 +00001261static Py_ssize_t
1262string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263{
Guido van Rossum045e6881997-09-08 18:30:11 +00001264 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001265 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266 return -1;
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271{
1272 if ( lenp )
1273 *lenp = self->ob_size;
1274 return 1;
1275}
1276
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277static Py_ssize_t
1278string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001279{
1280 if ( index != 0 ) {
1281 PyErr_SetString(PyExc_SystemError,
1282 "accessing non-existent string segment");
1283 return -1;
1284 }
1285 *ptr = self->ob_sval;
1286 return self->ob_size;
1287}
1288
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001289static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001290 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001291 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (ssizeargfunc)string_repeat, /*sq_repeat*/
1293 (ssizeargfunc)string_item, /*sq_item*/
1294 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001295 0, /*sq_ass_item*/
1296 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001297 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001298};
1299
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001300static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001301 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001302 (binaryfunc)string_subscript,
1303 0,
1304};
1305
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001306static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001307 (readbufferproc)string_buffer_getreadbuf,
1308 (writebufferproc)string_buffer_getwritebuf,
1309 (segcountproc)string_buffer_getsegcount,
1310 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001311};
1312
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313
1314
1315#define LEFTSTRIP 0
1316#define RIGHTSTRIP 1
1317#define BOTHSTRIP 2
1318
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001319/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001320static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1321
1322#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001323
Andrew Dalke525eab32006-05-26 14:00:45 +00001324
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001325/* Don't call if length < 2 */
1326#define Py_STRING_MATCH(target, offset, pattern, length) \
1327 (target[offset] == pattern[0] && \
1328 target[offset+length-1] == pattern[length-1] && \
1329 !memcmp(target+offset+1, pattern+1, length-2) )
1330
1331
Andrew Dalke525eab32006-05-26 14:00:45 +00001332/* Overallocate the initial list to reduce the number of reallocs for small
1333 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1334 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1335 text (roughly 11 words per line) and field delimited data (usually 1-10
1336 fields). For large strings the split algorithms are bandwidth limited
1337 so increasing the preallocation likely will not improve things.*/
1338
1339#define MAX_PREALLOC 12
1340
1341/* 5 splits gives 6 elements */
1342#define PREALLOC_SIZE(maxsplit) \
1343 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1344
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001345#define SPLIT_APPEND(data, left, right) \
1346 str = PyString_FromStringAndSize((data) + (left), \
1347 (right) - (left)); \
1348 if (str == NULL) \
1349 goto onError; \
1350 if (PyList_Append(list, str)) { \
1351 Py_DECREF(str); \
1352 goto onError; \
1353 } \
1354 else \
1355 Py_DECREF(str);
1356
Andrew Dalke02758d62006-05-26 15:21:01 +00001357#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001358 str = PyString_FromStringAndSize((data) + (left), \
1359 (right) - (left)); \
1360 if (str == NULL) \
1361 goto onError; \
1362 if (count < MAX_PREALLOC) { \
1363 PyList_SET_ITEM(list, count, str); \
1364 } else { \
1365 if (PyList_Append(list, str)) { \
1366 Py_DECREF(str); \
1367 goto onError; \
1368 } \
1369 else \
1370 Py_DECREF(str); \
1371 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001372 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001373
1374/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001375#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001376
Andrew Dalke02758d62006-05-26 15:21:01 +00001377#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1378#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1379#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1380#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1381
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001382Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001383split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384{
Andrew Dalke525eab32006-05-26 14:00:45 +00001385 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001386 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001387 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388
1389 if (list == NULL)
1390 return NULL;
1391
Andrew Dalke02758d62006-05-26 15:21:01 +00001392 i = j = 0;
1393
1394 while (maxsplit-- > 0) {
1395 SKIP_SPACE(s, i, len);
1396 if (i==len) break;
1397 j = i; i++;
1398 SKIP_NONSPACE(s, i, len);
1399 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001401
1402 if (i < len) {
1403 /* Only occurs when maxsplit was reached */
1404 /* Skip any remaining whitespace and copy to end of string */
1405 SKIP_SPACE(s, i, len);
1406 if (i != len)
1407 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001408 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412 Py_DECREF(list);
1413 return NULL;
1414}
1415
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001416Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001417split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001418{
Andrew Dalke525eab32006-05-26 14:00:45 +00001419 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001420 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422
1423 if (list == NULL)
1424 return NULL;
1425
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001426 i = j = 0;
1427 while ((j < len) && (maxcount-- > 0)) {
1428 for(; j<len; j++) {
1429 /* I found that using memchr makes no difference */
1430 if (s[j] == ch) {
1431 SPLIT_ADD(s, i, j);
1432 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001433 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001434 }
1435 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001437 if (i <= len) {
1438 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001440 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441 return list;
1442
1443 onError:
1444 Py_DECREF(list);
1445 return NULL;
1446}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001448PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449"S.split([sep [,maxsplit]]) -> list of strings\n\
1450\n\
1451Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001453splits are done. If sep is not specified or is None, any\n\
1454whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455
1456static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001457string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001459 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001460 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001463#ifdef USE_FAST
1464 Py_ssize_t pos;
1465#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466
Martin v. Löwis9c830762006-04-13 08:37:17 +00001467 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001470 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 if (PyString_Check(subobj)) {
1474 sub = PyString_AS_STRING(subobj);
1475 n = PyString_GET_SIZE(subobj);
1476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001477#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 else if (PyUnicode_Check(subobj))
1479 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1482 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001483
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 if (n == 0) {
1485 PyErr_SetString(PyExc_ValueError, "empty separator");
1486 return NULL;
1487 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 else if (n == 1)
1489 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
Andrew Dalke525eab32006-05-26 14:00:45 +00001491 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 if (list == NULL)
1493 return NULL;
1494
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001495#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001497 while (maxsplit-- > 0) {
1498 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1499 if (pos < 0)
1500 break;
1501 j = i+pos;
1502 SPLIT_ADD(s, i, j);
1503 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001505#else
1506 i = j = 0;
1507 while ((j+n <= len) && (maxsplit-- > 0)) {
1508 for (; j+n <= len; j++) {
1509 if (Py_STRING_MATCH(s, j, sub, n)) {
1510 SPLIT_ADD(s, i, j);
1511 i = j = j + n;
1512 break;
1513 }
1514 }
1515 }
1516#endif
1517 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001518 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 return list;
1520
Andrew Dalke525eab32006-05-26 14:00:45 +00001521 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 Py_DECREF(list);
1523 return NULL;
1524}
1525
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001526PyDoc_STRVAR(partition__doc__,
1527"S.partition(sep) -> (head, sep, tail)\n\
1528\n\
1529Searches for the separator sep in S, and returns the part before it,\n\
1530the separator itself, and the part after it. If the separator is not\n\
1531found, returns S and two empty strings.");
1532
1533static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001534string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001535{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001536 const char *sep;
1537 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001539 if (PyString_Check(sep_obj)) {
1540 sep = PyString_AS_STRING(sep_obj);
1541 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001542 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001543#ifdef Py_USING_UNICODE
1544 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001545 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001547 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548 return NULL;
1549
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001550 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001551 (PyObject*) self,
1552 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1553 sep_obj, sep, sep_len
1554 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001555}
1556
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001557PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001558"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001559\n\
1560Searches for the separator sep in S, starting at the end of S, and returns\n\
1561the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001562separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001563
1564static PyObject *
1565string_rpartition(PyStringObject *self, PyObject *sep_obj)
1566{
1567 const char *sep;
1568 Py_ssize_t sep_len;
1569
1570 if (PyString_Check(sep_obj)) {
1571 sep = PyString_AS_STRING(sep_obj);
1572 sep_len = PyString_GET_SIZE(sep_obj);
1573 }
1574#ifdef Py_USING_UNICODE
1575 else if (PyUnicode_Check(sep_obj))
1576 return PyUnicode_Partition((PyObject *) self, sep_obj);
1577#endif
1578 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1579 return NULL;
1580
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001581 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001582 (PyObject*) self,
1583 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1584 sep_obj, sep, sep_len
1585 );
1586}
1587
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001588Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001589rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590{
Andrew Dalke525eab32006-05-26 14:00:45 +00001591 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001592 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001593 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001594
1595 if (list == NULL)
1596 return NULL;
1597
Andrew Dalke02758d62006-05-26 15:21:01 +00001598 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001599
Andrew Dalke02758d62006-05-26 15:21:01 +00001600 while (maxsplit-- > 0) {
1601 RSKIP_SPACE(s, i);
1602 if (i<0) break;
1603 j = i; i--;
1604 RSKIP_NONSPACE(s, i);
1605 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001606 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001607 if (i >= 0) {
1608 /* Only occurs when maxsplit was reached */
1609 /* Skip any remaining whitespace and copy to beginning of string */
1610 RSKIP_SPACE(s, i);
1611 if (i >= 0)
1612 SPLIT_ADD(s, 0, i + 1);
1613
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001614 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001615 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001616 if (PyList_Reverse(list) < 0)
1617 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001618 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001620 Py_DECREF(list);
1621 return NULL;
1622}
1623
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001624Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001625rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001626{
Andrew Dalke525eab32006-05-26 14:00:45 +00001627 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001628 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001629 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001630
1631 if (list == NULL)
1632 return NULL;
1633
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001634 i = j = len - 1;
1635 while ((i >= 0) && (maxcount-- > 0)) {
1636 for (; i >= 0; i--) {
1637 if (s[i] == ch) {
1638 SPLIT_ADD(s, i + 1, j + 1);
1639 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001640 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001641 }
1642 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 }
1644 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001645 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001647 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001648 if (PyList_Reverse(list) < 0)
1649 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001650 return list;
1651
1652 onError:
1653 Py_DECREF(list);
1654 return NULL;
1655}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001656
1657PyDoc_STRVAR(rsplit__doc__,
1658"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1659\n\
1660Return a list of the words in the string S, using sep as the\n\
1661delimiter string, starting at the end of the string and working\n\
1662to the front. If maxsplit is given, at most maxsplit splits are\n\
1663done. If sep is not specified or is None, any whitespace string\n\
1664is a separator.");
1665
1666static PyObject *
1667string_rsplit(PyStringObject *self, PyObject *args)
1668{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001669 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001670 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001671 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001672 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001673
Martin v. Löwis9c830762006-04-13 08:37:17 +00001674 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675 return NULL;
1676 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001677 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 if (subobj == Py_None)
1679 return rsplit_whitespace(s, len, maxsplit);
1680 if (PyString_Check(subobj)) {
1681 sub = PyString_AS_STRING(subobj);
1682 n = PyString_GET_SIZE(subobj);
1683 }
1684#ifdef Py_USING_UNICODE
1685 else if (PyUnicode_Check(subobj))
1686 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1687#endif
1688 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1689 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001690
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001691 if (n == 0) {
1692 PyErr_SetString(PyExc_ValueError, "empty separator");
1693 return NULL;
1694 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001695 else if (n == 1)
1696 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001697
Andrew Dalke525eab32006-05-26 14:00:45 +00001698 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001699 if (list == NULL)
1700 return NULL;
1701
1702 j = len;
1703 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001704
1705 while ( (i >= 0) && (maxsplit-- > 0) ) {
1706 for (; i>=0; i--) {
1707 if (Py_STRING_MATCH(s, i, sub, n)) {
1708 SPLIT_ADD(s, i + n, j);
1709 j = i;
1710 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001711 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001712 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001715 SPLIT_ADD(s, 0, j);
1716 FIX_PREALLOC_SIZE(list);
1717 if (PyList_Reverse(list) < 0)
1718 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719 return list;
1720
Andrew Dalke525eab32006-05-26 14:00:45 +00001721onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 Py_DECREF(list);
1723 return NULL;
1724}
1725
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001727PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728"S.join(sequence) -> string\n\
1729\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001731sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732
1733static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001734string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735{
1736 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001737 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001741 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001742 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001743 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Tim Peters19fe14e2001-01-19 03:03:47 +00001745 seq = PySequence_Fast(orig, "");
1746 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001747 return NULL;
1748 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001749
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001750 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001751 if (seqlen == 0) {
1752 Py_DECREF(seq);
1753 return PyString_FromString("");
1754 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001756 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001757 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1758 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001759 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763
Raymond Hettinger674f2412004-08-23 23:23:54 +00001764 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001765 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001766 * Do a pre-pass to figure out the total amount of space we'll
1767 * need (sz), see whether any argument is absurd, and defer to
1768 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001769 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001770 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001772 item = PySequence_Fast_GET_ITEM(seq, i);
1773 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001774#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001776 /* Defer to Unicode join.
1777 * CAUTION: There's no gurantee that the
1778 * original sequence can be iterated over
1779 * again, so we must pass seq here.
1780 */
1781 PyObject *result;
1782 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001783 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001784 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001785 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001786#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001787 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001788 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001789 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001790 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 Py_DECREF(seq);
1792 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001793 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 sz += PyString_GET_SIZE(item);
1795 if (i != 0)
1796 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001797 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001798 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001799 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001800 Py_DECREF(seq);
1801 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 }
1804
1805 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001806 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001807 if (res == NULL) {
1808 Py_DECREF(seq);
1809 return NULL;
1810 }
1811
1812 /* Catenate everything. */
1813 p = PyString_AS_STRING(res);
1814 for (i = 0; i < seqlen; ++i) {
1815 size_t n;
1816 item = PySequence_Fast_GET_ITEM(seq, i);
1817 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001818 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001819 p += n;
1820 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001821 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001822 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001823 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001825
Jeremy Hylton49048292000-07-11 03:28:17 +00001826 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828}
1829
Tim Peters52e155e2001-06-16 05:42:57 +00001830PyObject *
1831_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001832{
Tim Petersa7259592001-06-16 05:11:17 +00001833 assert(sep != NULL && PyString_Check(sep));
1834 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001835 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001836}
1837
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001838Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001839string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001840{
1841 if (*end > len)
1842 *end = len;
1843 else if (*end < 0)
1844 *end += len;
1845 if (*end < 0)
1846 *end = 0;
1847 if (*start < 0)
1848 *start += len;
1849 if (*start < 0)
1850 *start = 0;
1851}
1852
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001853Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001854string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001857 const char *sub;
1858 Py_ssize_t sub_len;
1859 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001861 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1862 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 return -2;
1864 if (PyString_Check(subobj)) {
1865 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001866 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001868#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001870 return PyUnicode_Find(
1871 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001872#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001873 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001874 /* XXX - the "expected a character buffer object" is pretty
1875 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876 return -2;
1877
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001878 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001879 return stringlib_find_slice(
1880 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1881 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001882 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001883 return stringlib_rfind_slice(
1884 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1885 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886}
1887
1888
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001889PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890"S.find(sub [,start [,end]]) -> int\n\
1891\n\
1892Return the lowest index in S where substring sub is found,\n\
1893such that sub is contained within s[start,end]. Optional\n\
1894arguments start and end are interpreted as in slice notation.\n\
1895\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001896Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897
1898static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001899string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001901 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902 if (result == -2)
1903 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905}
1906
1907
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001908PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909"S.index(sub [,start [,end]]) -> int\n\
1910\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912
1913static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001914string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917 if (result == -2)
1918 return NULL;
1919 if (result == -1) {
1920 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001921 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922 return NULL;
1923 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001924 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925}
1926
1927
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001928PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929"S.rfind(sub [,start [,end]]) -> int\n\
1930\n\
1931Return the highest index in S where substring sub is found,\n\
1932such that sub is contained within s[start,end]. Optional\n\
1933arguments start and end are interpreted as in slice notation.\n\
1934\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001935Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936
1937static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001938string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001940 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941 if (result == -2)
1942 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944}
1945
1946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001947PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948"S.rindex(sub [,start [,end]]) -> int\n\
1949\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951
1952static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001953string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956 if (result == -2)
1957 return NULL;
1958 if (result == -1) {
1959 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001960 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 return NULL;
1962 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964}
1965
1966
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001967Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001968do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1969{
1970 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001972 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001973 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1974 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975
1976 i = 0;
1977 if (striptype != RIGHTSTRIP) {
1978 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1979 i++;
1980 }
1981 }
1982
1983 j = len;
1984 if (striptype != LEFTSTRIP) {
1985 do {
1986 j--;
1987 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1988 j++;
1989 }
1990
1991 if (i == 0 && j == len && PyString_CheckExact(self)) {
1992 Py_INCREF(self);
1993 return (PyObject*)self;
1994 }
1995 else
1996 return PyString_FromStringAndSize(s+i, j-i);
1997}
1998
1999
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002000Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002001do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002{
2003 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002004 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006 i = 0;
2007 if (striptype != RIGHTSTRIP) {
2008 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2009 i++;
2010 }
2011 }
2012
2013 j = len;
2014 if (striptype != LEFTSTRIP) {
2015 do {
2016 j--;
2017 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2018 j++;
2019 }
2020
Tim Peters8fa5dd02001-09-12 02:18:30 +00002021 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022 Py_INCREF(self);
2023 return (PyObject*)self;
2024 }
2025 else
2026 return PyString_FromStringAndSize(s+i, j-i);
2027}
2028
2029
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002030Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002031do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2032{
2033 PyObject *sep = NULL;
2034
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002035 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002036 return NULL;
2037
2038 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002039 if (PyString_Check(sep))
2040 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002041#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 else if (PyUnicode_Check(sep)) {
2043 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2044 PyObject *res;
2045 if (uniself==NULL)
2046 return NULL;
2047 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2048 striptype, sep);
2049 Py_DECREF(uniself);
2050 return res;
2051 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002052#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002053 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002054#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002055 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002056#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002057 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002058#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002059 STRIPNAME(striptype));
2060 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002061 }
2062
2063 return do_strip(self, striptype);
2064}
2065
2066
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002067PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002068"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069\n\
2070Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002071whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002072If chars is given and not None, remove characters in chars instead.\n\
2073If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074
2075static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002076string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078 if (PyTuple_GET_SIZE(args) == 0)
2079 return do_strip(self, BOTHSTRIP); /* Common case */
2080 else
2081 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082}
2083
2084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002085PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002086"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002088Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002089If chars is given and not None, remove characters in chars instead.\n\
2090If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091
2092static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002093string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002095 if (PyTuple_GET_SIZE(args) == 0)
2096 return do_strip(self, LEFTSTRIP); /* Common case */
2097 else
2098 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099}
2100
2101
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002102PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002103"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002105Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106If chars is given and not None, remove characters in chars instead.\n\
2107If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
2109static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002110string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002112 if (PyTuple_GET_SIZE(args) == 0)
2113 return do_strip(self, RIGHTSTRIP); /* Common case */
2114 else
2115 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116}
2117
2118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002119PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120"S.lower() -> string\n\
2121\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002124/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2125#ifndef _tolower
2126#define _tolower tolower
2127#endif
2128
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002130string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002132 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002133 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002134 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002136 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002137 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002139
2140 s = PyString_AS_STRING(newobj);
2141
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002142 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002143
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002145 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002146 if (isupper(c))
2147 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002149
Anthony Baxtera6286212006-04-11 07:42:36 +00002150 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151}
2152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002153PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154"S.upper() -> string\n\
2155\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002158#ifndef _toupper
2159#define _toupper toupper
2160#endif
2161
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002163string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002165 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002166 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002167 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002169 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002170 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172
2173 s = PyString_AS_STRING(newobj);
2174
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002175 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002176
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002178 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002179 if (islower(c))
2180 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002182
Anthony Baxtera6286212006-04-11 07:42:36 +00002183 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184}
2185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002186PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187"S.title() -> string\n\
2188\n\
2189Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002190characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191
2192static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002193string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194{
2195 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002196 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002198 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199
Anthony Baxtera6286212006-04-11 07:42:36 +00002200 newobj = PyString_FromStringAndSize(NULL, n);
2201 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002203 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 for (i = 0; i < n; i++) {
2205 int c = Py_CHARMASK(*s++);
2206 if (islower(c)) {
2207 if (!previous_is_cased)
2208 c = toupper(c);
2209 previous_is_cased = 1;
2210 } else if (isupper(c)) {
2211 if (previous_is_cased)
2212 c = tolower(c);
2213 previous_is_cased = 1;
2214 } else
2215 previous_is_cased = 0;
2216 *s_new++ = c;
2217 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002218 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002219}
2220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002221PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222"S.capitalize() -> string\n\
2223\n\
2224Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002225capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226
2227static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002228string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229{
2230 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002231 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002232 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Anthony Baxtera6286212006-04-11 07:42:36 +00002234 newobj = PyString_FromStringAndSize(NULL, n);
2235 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238 if (0 < n) {
2239 int c = Py_CHARMASK(*s++);
2240 if (islower(c))
2241 *s_new = toupper(c);
2242 else
2243 *s_new = c;
2244 s_new++;
2245 }
2246 for (i = 1; i < n; i++) {
2247 int c = Py_CHARMASK(*s++);
2248 if (isupper(c))
2249 *s_new = tolower(c);
2250 else
2251 *s_new = c;
2252 s_new++;
2253 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002254 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255}
2256
2257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002258PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259"S.count(sub[, start[, end]]) -> int\n\
2260\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002261Return the number of non-overlapping occurrences of substring sub in\n\
2262string S[start:end]. Optional arguments start and end are interpreted\n\
2263as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
2265static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002266string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002268 PyObject *sub_obj;
2269 const char *str = PyString_AS_STRING(self), *sub;
2270 Py_ssize_t sub_len;
2271 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002273 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2274 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002276
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002277 if (PyString_Check(sub_obj)) {
2278 sub = PyString_AS_STRING(sub_obj);
2279 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002281#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002282 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002283 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002284 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002285 if (count == -1)
2286 return NULL;
2287 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002288 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002289 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002290#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002291 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292 return NULL;
2293
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002294 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002295
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002296 return PyInt_FromSsize_t(
2297 stringlib_count(str + start, end - start, sub, sub_len)
2298 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299}
2300
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002301PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302"S.swapcase() -> string\n\
2303\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002305converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306
2307static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002308string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309{
2310 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002311 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002312 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313
Anthony Baxtera6286212006-04-11 07:42:36 +00002314 newobj = PyString_FromStringAndSize(NULL, n);
2315 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002317 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 for (i = 0; i < n; i++) {
2319 int c = Py_CHARMASK(*s++);
2320 if (islower(c)) {
2321 *s_new = toupper(c);
2322 }
2323 else if (isupper(c)) {
2324 *s_new = tolower(c);
2325 }
2326 else
2327 *s_new = c;
2328 s_new++;
2329 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002330 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331}
2332
2333
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002334PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335"S.translate(table [,deletechars]) -> string\n\
2336\n\
2337Return a copy of the string S, where all characters occurring\n\
2338in the optional argument deletechars are removed, and the\n\
2339remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002340translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341
2342static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002343string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 register char *input, *output;
2346 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002347 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002350 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 PyObject *result;
2352 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002355 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358
2359 if (PyString_Check(tableobj)) {
2360 table1 = PyString_AS_STRING(tableobj);
2361 tablen = PyString_GET_SIZE(tableobj);
2362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002363#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002365 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002366 parameter; instead a mapping to None will cause characters
2367 to be deleted. */
2368 if (delobj != NULL) {
2369 PyErr_SetString(PyExc_TypeError,
2370 "deletions are implemented differently for unicode");
2371 return NULL;
2372 }
2373 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2374 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002375#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378
Martin v. Löwis00b61272002-12-12 20:03:19 +00002379 if (tablen != 256) {
2380 PyErr_SetString(PyExc_ValueError,
2381 "translation table must be 256 characters long");
2382 return NULL;
2383 }
2384
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 if (delobj != NULL) {
2386 if (PyString_Check(delobj)) {
2387 del_table = PyString_AS_STRING(delobj);
2388 dellen = PyString_GET_SIZE(delobj);
2389 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002390#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 else if (PyUnicode_Check(delobj)) {
2392 PyErr_SetString(PyExc_TypeError,
2393 "deletions are implemented differently for unicode");
2394 return NULL;
2395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002396#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2398 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002399 }
2400 else {
2401 del_table = NULL;
2402 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403 }
2404
2405 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002406 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 result = PyString_FromStringAndSize((char *)NULL, inlen);
2408 if (result == NULL)
2409 return NULL;
2410 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002411 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412
2413 if (dellen == 0) {
2414 /* If no deletions are required, use faster code */
2415 for (i = inlen; --i >= 0; ) {
2416 c = Py_CHARMASK(*input++);
2417 if (Py_CHARMASK((*output++ = table[c])) != c)
2418 changed = 1;
2419 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002420 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 return result;
2422 Py_DECREF(result);
2423 Py_INCREF(input_obj);
2424 return input_obj;
2425 }
2426
2427 for (i = 0; i < 256; i++)
2428 trans_table[i] = Py_CHARMASK(table[i]);
2429
2430 for (i = 0; i < dellen; i++)
2431 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2432
2433 for (i = inlen; --i >= 0; ) {
2434 c = Py_CHARMASK(*input++);
2435 if (trans_table[c] != -1)
2436 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2437 continue;
2438 changed = 1;
2439 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002440 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441 Py_DECREF(result);
2442 Py_INCREF(input_obj);
2443 return input_obj;
2444 }
2445 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002446 if (inlen > 0)
2447 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448 return result;
2449}
2450
2451
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002452#define FORWARD 1
2453#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002455/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002457#define findchar(target, target_len, c) \
2458 ((char *)memchr((const void *)(target), c, target_len))
2459
2460/* String ops must return a string. */
2461/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002462Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002463return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002464{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002465 if (PyString_CheckExact(self)) {
2466 Py_INCREF(self);
2467 return self;
2468 }
2469 return (PyStringObject *)PyString_FromStringAndSize(
2470 PyString_AS_STRING(self),
2471 PyString_GET_SIZE(self));
2472}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002473
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002474Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002475countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002476{
2477 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002478 const char *start=target;
2479 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002481 while ( (start=findchar(start, end-start, c)) != NULL ) {
2482 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002483 if (count >= maxcount)
2484 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002485 start += 1;
2486 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002487 return count;
2488}
2489
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002490Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002491findstring(const char *target, Py_ssize_t target_len,
2492 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493 Py_ssize_t start,
2494 Py_ssize_t end,
2495 int direction)
2496{
2497 if (start < 0) {
2498 start += target_len;
2499 if (start < 0)
2500 start = 0;
2501 }
2502 if (end > target_len) {
2503 end = target_len;
2504 } else if (end < 0) {
2505 end += target_len;
2506 if (end < 0)
2507 end = 0;
2508 }
2509
2510 /* zero-length substrings always match at the first attempt */
2511 if (pattern_len == 0)
2512 return (direction > 0) ? start : end;
2513
2514 end -= pattern_len;
2515
2516 if (direction < 0) {
2517 for (; end >= start; end--)
2518 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2519 return end;
2520 } else {
2521 for (; start <= end; start++)
2522 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2523 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002524 }
2525 return -1;
2526}
2527
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002528Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002529countstring(const char *target, Py_ssize_t target_len,
2530 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002531 Py_ssize_t start,
2532 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002533 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002535 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002537 if (start < 0) {
2538 start += target_len;
2539 if (start < 0)
2540 start = 0;
2541 }
2542 if (end > target_len) {
2543 end = target_len;
2544 } else if (end < 0) {
2545 end += target_len;
2546 if (end < 0)
2547 end = 0;
2548 }
2549
2550 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002551 if (pattern_len == 0 || maxcount == 0) {
2552 if (target_len+1 < maxcount)
2553 return target_len+1;
2554 return maxcount;
2555 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002556
2557 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002558 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002559 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2561 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002562 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 end -= pattern_len-1;
2564 }
2565 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002566 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2568 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002569 if (--maxcount <= 0)
2570 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002571 start += pattern_len-1;
2572 }
2573 }
2574 return count;
2575}
2576
2577
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002578/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002579
2580/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002581Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002582replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002583 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584 Py_ssize_t maxcount)
2585{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002586 char *self_s, *result_s;
2587 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002588 Py_ssize_t count, i, product;
2589 PyStringObject *result;
2590
2591 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002592
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002593 /* 1 at the end plus 1 after every character */
2594 count = self_len+1;
2595 if (maxcount < count)
2596 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002597
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598 /* Check for overflow */
2599 /* result_len = count * to_len + self_len; */
2600 product = count * to_len;
2601 if (product / to_len != count) {
2602 PyErr_SetString(PyExc_OverflowError,
2603 "replace string is too long");
2604 return NULL;
2605 }
2606 result_len = product + self_len;
2607 if (result_len < 0) {
2608 PyErr_SetString(PyExc_OverflowError,
2609 "replace string is too long");
2610 return NULL;
2611 }
2612
2613 if (! (result = (PyStringObject *)
2614 PyString_FromStringAndSize(NULL, result_len)) )
2615 return NULL;
2616
2617 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 result_s = PyString_AS_STRING(result);
2619
2620 /* TODO: special case single character, which doesn't need memcpy */
2621
2622 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002623 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002624 result_s += to_len;
2625 count -= 1;
2626
2627 for (i=0; i<count; i++) {
2628 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002629 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002630 result_s += to_len;
2631 }
2632
2633 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002634 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002635
2636 return result;
2637}
2638
2639/* Special case for deleting a single character */
2640/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002641Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002642replace_delete_single_character(PyStringObject *self,
2643 char from_c, Py_ssize_t maxcount)
2644{
2645 char *self_s, *result_s;
2646 char *start, *next, *end;
2647 Py_ssize_t self_len, result_len;
2648 Py_ssize_t count;
2649 PyStringObject *result;
2650
2651 self_len = PyString_GET_SIZE(self);
2652 self_s = PyString_AS_STRING(self);
2653
Andrew Dalke51324072006-05-26 20:25:22 +00002654 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002655 if (count == 0) {
2656 return return_self(self);
2657 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002658
2659 result_len = self_len - count; /* from_len == 1 */
2660 assert(result_len>=0);
2661
2662 if ( (result = (PyStringObject *)
2663 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2664 return NULL;
2665 result_s = PyString_AS_STRING(result);
2666
2667 start = self_s;
2668 end = self_s + self_len;
2669 while (count-- > 0) {
2670 next = findchar(start, end-start, from_c);
2671 if (next == NULL)
2672 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002673 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002674 result_s += (next-start);
2675 start = next+1;
2676 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002677 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002678
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002679 return result;
2680}
2681
2682/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2683
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002684Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002685replace_delete_substring(PyStringObject *self,
2686 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002687 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002688 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002689 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002690 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002691 Py_ssize_t count, offset;
2692 PyStringObject *result;
2693
2694 self_len = PyString_GET_SIZE(self);
2695 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696
2697 count = countstring(self_s, self_len,
2698 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002699 0, self_len, 1,
2700 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002701
2702 if (count == 0) {
2703 /* no matches */
2704 return return_self(self);
2705 }
2706
2707 result_len = self_len - (count * from_len);
2708 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002709
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002710 if ( (result = (PyStringObject *)
2711 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2712 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002713
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002714 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002715
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716 start = self_s;
2717 end = self_s + self_len;
2718 while (count-- > 0) {
2719 offset = findstring(start, end-start,
2720 from_s, from_len,
2721 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002722 if (offset == -1)
2723 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002725
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002726 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002727
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002728 result_s += (next-start);
2729 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002730 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002731 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002732 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733}
2734
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002735/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002736Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737replace_single_character_in_place(PyStringObject *self,
2738 char from_c, char to_c,
2739 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002740{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002741 char *self_s, *result_s, *start, *end, *next;
2742 Py_ssize_t self_len;
2743 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002744
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002745 /* The result string will be the same size */
2746 self_s = PyString_AS_STRING(self);
2747 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002748
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002749 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002750
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751 if (next == NULL) {
2752 /* No matches; return the original string */
2753 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002754 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002755
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002756 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002757 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 if (result == NULL)
2759 return NULL;
2760 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002761 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002762
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002763 /* change everything in-place, starting with this one */
2764 start = result_s + (next-self_s);
2765 *start = to_c;
2766 start++;
2767 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002768
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769 while (--maxcount > 0) {
2770 next = findchar(start, end-start, from_c);
2771 if (next == NULL)
2772 break;
2773 *next = to_c;
2774 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002775 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002776
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002777 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002778}
2779
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002781Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002782replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002783 const char *from_s, Py_ssize_t from_len,
2784 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785 Py_ssize_t maxcount)
2786{
2787 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002788 char *self_s;
2789 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002790 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002791
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794 self_s = PyString_AS_STRING(self);
2795 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 offset = findstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 if (offset == -1) {
2801 /* No matches; return the original string */
2802 return return_self(self);
2803 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002804
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002806 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 if (result == NULL)
2808 return NULL;
2809 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002810 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002811
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002812 /* change everything in-place, starting with this one */
2813 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002814 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 start += from_len;
2816 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002817
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 while ( --maxcount > 0) {
2819 offset = findstring(start, end-start,
2820 from_s, from_len,
2821 0, end-start, FORWARD);
2822 if (offset==-1)
2823 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002824 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002825 start += offset+from_len;
2826 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002827
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 return result;
2829}
2830
2831/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002832Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002833replace_single_character(PyStringObject *self,
2834 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002835 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 Py_ssize_t maxcount)
2837{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002838 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002840 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841 Py_ssize_t count, product;
2842 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002843
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 self_s = PyString_AS_STRING(self);
2845 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002846
Andrew Dalke51324072006-05-26 20:25:22 +00002847 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002848 if (count == 0) {
2849 /* no matches, return unchanged */
2850 return return_self(self);
2851 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002852
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002853 /* use the difference between current and new, hence the "-1" */
2854 /* result_len = self_len + count * (to_len-1) */
2855 product = count * (to_len-1);
2856 if (product / (to_len-1) != count) {
2857 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2858 return NULL;
2859 }
2860 result_len = self_len + product;
2861 if (result_len < 0) {
2862 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2863 return NULL;
2864 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002865
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002866 if ( (result = (PyStringObject *)
2867 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2868 return NULL;
2869 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002870
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871 start = self_s;
2872 end = self_s + self_len;
2873 while (count-- > 0) {
2874 next = findchar(start, end-start, from_c);
2875 if (next == NULL)
2876 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002877
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 if (next == start) {
2879 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002880 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 result_s += to_len;
2882 start += 1;
2883 } else {
2884 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002885 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002886 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002887 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 result_s += to_len;
2889 start = next+1;
2890 }
2891 }
2892 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002893 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002894
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 return result;
2896}
2897
2898/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002899Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002901 const char *from_s, Py_ssize_t from_len,
2902 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002904 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002905 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002906 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002907 Py_ssize_t count, offset, product;
2908 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002909
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 self_s = PyString_AS_STRING(self);
2911 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002912
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002913 count = countstring(self_s, self_len,
2914 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002915 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 if (count == 0) {
2917 /* no matches, return unchanged */
2918 return return_self(self);
2919 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002920
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002921 /* Check for overflow */
2922 /* result_len = self_len + count * (to_len-from_len) */
2923 product = count * (to_len-from_len);
2924 if (product / (to_len-from_len) != count) {
2925 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2926 return NULL;
2927 }
2928 result_len = self_len + product;
2929 if (result_len < 0) {
2930 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2931 return NULL;
2932 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002933
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002934 if ( (result = (PyStringObject *)
2935 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2936 return NULL;
2937 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002938
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002939 start = self_s;
2940 end = self_s + self_len;
2941 while (count-- > 0) {
2942 offset = findstring(start, end-start,
2943 from_s, from_len,
2944 0, end-start, FORWARD);
2945 if (offset == -1)
2946 break;
2947 next = start+offset;
2948 if (next == start) {
2949 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002950 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002951 result_s += to_len;
2952 start += from_len;
2953 } else {
2954 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002955 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002956 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002957 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958 result_s += to_len;
2959 start = next+from_len;
2960 }
2961 }
2962 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002963 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002964
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002965 return result;
2966}
2967
2968
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002969Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002970replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002971 const char *from_s, Py_ssize_t from_len,
2972 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 Py_ssize_t maxcount)
2974{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002975 if (maxcount < 0) {
2976 maxcount = PY_SSIZE_T_MAX;
2977 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2978 /* nothing to do; return the original string */
2979 return return_self(self);
2980 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002981
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002982 if (maxcount == 0 ||
2983 (from_len == 0 && to_len == 0)) {
2984 /* nothing to do; return the original string */
2985 return return_self(self);
2986 }
2987
2988 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002989
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002990 if (from_len == 0) {
2991 /* insert the 'to' string everywhere. */
2992 /* >>> "Python".replace("", ".") */
2993 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002994 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002995 }
2996
2997 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2998 /* point for an empty self string to generate a non-empty string */
2999 /* Special case so the remaining code always gets a non-empty string */
3000 if (PyString_GET_SIZE(self) == 0) {
3001 return return_self(self);
3002 }
3003
3004 if (to_len == 0) {
3005 /* delete all occurances of 'from' string */
3006 if (from_len == 1) {
3007 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003008 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003009 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003010 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003011 }
3012 }
3013
3014 /* Handle special case where both strings have the same length */
3015
3016 if (from_len == to_len) {
3017 if (from_len == 1) {
3018 return replace_single_character_in_place(
3019 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003020 from_s[0],
3021 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003022 maxcount);
3023 } else {
3024 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003025 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003026 }
3027 }
3028
3029 /* Otherwise use the more generic algorithms */
3030 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003031 return replace_single_character(self, from_s[0],
3032 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003033 } else {
3034 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003035 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003036 }
3037}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003038
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003039PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003040"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041\n\
3042Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003043old replaced by new. If the optional argument count is\n\
3044given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003045
3046static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003047string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003049 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003050 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003051 const char *from_s, *to_s;
3052 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003053
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003054 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003055 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003058 from_s = PyString_AS_STRING(from);
3059 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003061#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003063 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003065#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003066 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 return NULL;
3068
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003069 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003070 to_s = PyString_AS_STRING(to);
3071 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003073#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003074 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003075 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003077#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003078 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079 return NULL;
3080
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003082 from_s, from_len,
3083 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003084}
3085
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003086/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003088/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003089 * against substr, using the start and end arguments. Returns
3090 * -1 on error, 0 if not found and 1 if found.
3091 */
3092Py_LOCAL(int)
3093_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3094 Py_ssize_t end, int direction)
3095{
3096 Py_ssize_t len = PyString_GET_SIZE(self);
3097 Py_ssize_t slen;
3098 const char* sub;
3099 const char* str;
3100
3101 if (PyString_Check(substr)) {
3102 sub = PyString_AS_STRING(substr);
3103 slen = PyString_GET_SIZE(substr);
3104 }
3105#ifdef Py_USING_UNICODE
3106 else if (PyUnicode_Check(substr))
3107 return PyUnicode_Tailmatch((PyObject *)self,
3108 substr, start, end, direction);
3109#endif
3110 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3111 return -1;
3112 str = PyString_AS_STRING(self);
3113
3114 string_adjust_indices(&start, &end, len);
3115
3116 if (direction < 0) {
3117 /* startswith */
3118 if (start+slen > len)
3119 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003120 } else {
3121 /* endswith */
3122 if (end-start < slen || start > len)
3123 return 0;
3124
3125 if (end-slen > start)
3126 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003127 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003128 if (end-start >= slen)
3129 return ! memcmp(str+start, sub, slen);
3130 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003131}
3132
3133
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003134PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003137Return True if S starts with the specified prefix, False otherwise.\n\
3138With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003139With optional end, stop comparing S at that position.\n\
3140prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003141
3142static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003143string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003145 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003146 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003148 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003149
Guido van Rossumc6821402000-05-08 14:08:05 +00003150 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3151 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003153 if (PyTuple_Check(subobj)) {
3154 Py_ssize_t i;
3155 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3156 result = _string_tailmatch(self,
3157 PyTuple_GET_ITEM(subobj, i),
3158 start, end, -1);
3159 if (result == -1)
3160 return NULL;
3161 else if (result) {
3162 Py_RETURN_TRUE;
3163 }
3164 }
3165 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166 }
Georg Brandl24250812006-06-09 18:45:48 +00003167 result = _string_tailmatch(self, subobj, start, end, -1);
3168 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003169 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003170 else
Georg Brandl24250812006-06-09 18:45:48 +00003171 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172}
3173
3174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003175PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003176"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003178Return True if S ends with the specified suffix, False otherwise.\n\
3179With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003180With optional end, stop comparing S at that position.\n\
3181suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003182
3183static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003184string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003186 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003187 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003189 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003190
Guido van Rossumc6821402000-05-08 14:08:05 +00003191 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3192 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003194 if (PyTuple_Check(subobj)) {
3195 Py_ssize_t i;
3196 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3197 result = _string_tailmatch(self,
3198 PyTuple_GET_ITEM(subobj, i),
3199 start, end, +1);
3200 if (result == -1)
3201 return NULL;
3202 else if (result) {
3203 Py_RETURN_TRUE;
3204 }
3205 }
3206 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003207 }
Georg Brandl24250812006-06-09 18:45:48 +00003208 result = _string_tailmatch(self, subobj, start, end, +1);
3209 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003210 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003211 else
Georg Brandl24250812006-06-09 18:45:48 +00003212 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213}
3214
3215
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003216PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003217"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003218\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003219Encodes S using the codec registered for encoding. encoding defaults\n\
3220to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003222a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3223'xmlcharrefreplace' as well as any other name registered with\n\
3224codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003225
3226static PyObject *
3227string_encode(PyStringObject *self, PyObject *args)
3228{
3229 char *encoding = NULL;
3230 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003231 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003232
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003233 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3234 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003235 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003236 if (v == NULL)
3237 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3239 PyErr_Format(PyExc_TypeError,
3240 "encoder did not return a string/unicode object "
3241 "(type=%.400s)",
3242 v->ob_type->tp_name);
3243 Py_DECREF(v);
3244 return NULL;
3245 }
3246 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003247
3248 onError:
3249 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003250}
3251
3252
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003253PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003254"S.decode([encoding[,errors]]) -> object\n\
3255\n\
3256Decodes S using the codec registered for encoding. encoding defaults\n\
3257to the default encoding. errors may be given to set a different error\n\
3258handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003259a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3260as well as any other name registerd with codecs.register_error that is\n\
3261able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003262
3263static PyObject *
3264string_decode(PyStringObject *self, PyObject *args)
3265{
3266 char *encoding = NULL;
3267 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003268 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003269
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003270 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3271 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003272 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003273 if (v == NULL)
3274 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3276 PyErr_Format(PyExc_TypeError,
3277 "decoder did not return a string/unicode object "
3278 "(type=%.400s)",
3279 v->ob_type->tp_name);
3280 Py_DECREF(v);
3281 return NULL;
3282 }
3283 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003284
3285 onError:
3286 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003287}
3288
3289
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003290PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003291"S.expandtabs([tabsize]) -> string\n\
3292\n\
3293Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003294If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003295
3296static PyObject*
3297string_expandtabs(PyStringObject *self, PyObject *args)
3298{
3299 const char *e, *p;
3300 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003301 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003302 PyObject *u;
3303 int tabsize = 8;
3304
3305 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3306 return NULL;
3307
Thomas Wouters7e474022000-07-16 12:04:32 +00003308 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003309 i = j = 0;
3310 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3311 for (p = PyString_AS_STRING(self); p < e; p++)
3312 if (*p == '\t') {
3313 if (tabsize > 0)
3314 j += tabsize - (j % tabsize);
3315 }
3316 else {
3317 j++;
3318 if (*p == '\n' || *p == '\r') {
3319 i += j;
3320 j = 0;
3321 }
3322 }
3323
3324 /* Second pass: create output string and fill it */
3325 u = PyString_FromStringAndSize(NULL, i + j);
3326 if (!u)
3327 return NULL;
3328
3329 j = 0;
3330 q = PyString_AS_STRING(u);
3331
3332 for (p = PyString_AS_STRING(self); p < e; p++)
3333 if (*p == '\t') {
3334 if (tabsize > 0) {
3335 i = tabsize - (j % tabsize);
3336 j += i;
3337 while (i--)
3338 *q++ = ' ';
3339 }
3340 }
3341 else {
3342 j++;
3343 *q++ = *p;
3344 if (*p == '\n' || *p == '\r')
3345 j = 0;
3346 }
3347
3348 return u;
3349}
3350
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003351Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003352pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353{
3354 PyObject *u;
3355
3356 if (left < 0)
3357 left = 0;
3358 if (right < 0)
3359 right = 0;
3360
Tim Peters8fa5dd02001-09-12 02:18:30 +00003361 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003362 Py_INCREF(self);
3363 return (PyObject *)self;
3364 }
3365
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003366 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367 left + PyString_GET_SIZE(self) + right);
3368 if (u) {
3369 if (left)
3370 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003371 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003372 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373 PyString_GET_SIZE(self));
3374 if (right)
3375 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3376 fill, right);
3377 }
3378
3379 return u;
3380}
3381
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003382PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003383"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003384"\n"
3385"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003386"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387
3388static PyObject *
3389string_ljust(PyStringObject *self, PyObject *args)
3390{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003391 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003392 char fillchar = ' ';
3393
Thomas Wouters4abb3662006-04-19 14:50:15 +00003394 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 return NULL;
3396
Tim Peters8fa5dd02001-09-12 02:18:30 +00003397 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398 Py_INCREF(self);
3399 return (PyObject*) self;
3400 }
3401
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003402 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403}
3404
3405
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003406PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003407"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003408"\n"
3409"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003410"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411
3412static PyObject *
3413string_rjust(PyStringObject *self, PyObject *args)
3414{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003415 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003416 char fillchar = ' ';
3417
Thomas Wouters4abb3662006-04-19 14:50:15 +00003418 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419 return NULL;
3420
Tim Peters8fa5dd02001-09-12 02:18:30 +00003421 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422 Py_INCREF(self);
3423 return (PyObject*) self;
3424 }
3425
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003426 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427}
3428
3429
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003430PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003431"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003432"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003433"Return S centered in a string of length width. Padding is\n"
3434"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435
3436static PyObject *
3437string_center(PyStringObject *self, PyObject *args)
3438{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003439 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003440 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003441 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442
Thomas Wouters4abb3662006-04-19 14:50:15 +00003443 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444 return NULL;
3445
Tim Peters8fa5dd02001-09-12 02:18:30 +00003446 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447 Py_INCREF(self);
3448 return (PyObject*) self;
3449 }
3450
3451 marg = width - PyString_GET_SIZE(self);
3452 left = marg / 2 + (marg & width & 1);
3453
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003454 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455}
3456
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003457PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003458"S.zfill(width) -> string\n"
3459"\n"
3460"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003461"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003462
3463static PyObject *
3464string_zfill(PyStringObject *self, PyObject *args)
3465{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003466 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003467 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003468 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003469 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003470
Thomas Wouters4abb3662006-04-19 14:50:15 +00003471 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003472 return NULL;
3473
3474 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003475 if (PyString_CheckExact(self)) {
3476 Py_INCREF(self);
3477 return (PyObject*) self;
3478 }
3479 else
3480 return PyString_FromStringAndSize(
3481 PyString_AS_STRING(self),
3482 PyString_GET_SIZE(self)
3483 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003484 }
3485
3486 fill = width - PyString_GET_SIZE(self);
3487
3488 s = pad(self, fill, 0, '0');
3489
3490 if (s == NULL)
3491 return NULL;
3492
3493 p = PyString_AS_STRING(s);
3494 if (p[fill] == '+' || p[fill] == '-') {
3495 /* move sign to beginning of string */
3496 p[0] = p[fill];
3497 p[fill] = '0';
3498 }
3499
3500 return (PyObject*) s;
3501}
3502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003503PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003504"S.isspace() -> bool\n\
3505\n\
3506Return True if all characters in S are whitespace\n\
3507and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003508
3509static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003510string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003511{
Fred Drakeba096332000-07-09 07:04:36 +00003512 register const unsigned char *p
3513 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003514 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516 /* Shortcut for single character strings */
3517 if (PyString_GET_SIZE(self) == 1 &&
3518 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003519 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003520
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003521 /* Special case for empty strings */
3522 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003523 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003524
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525 e = p + PyString_GET_SIZE(self);
3526 for (; p < e; p++) {
3527 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003528 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003530 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003531}
3532
3533
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003534PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003535"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003536\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003537Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003538and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539
3540static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003541string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003542{
Fred Drakeba096332000-07-09 07:04:36 +00003543 register const unsigned char *p
3544 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003545 register const unsigned char *e;
3546
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003547 /* Shortcut for single character strings */
3548 if (PyString_GET_SIZE(self) == 1 &&
3549 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003551
3552 /* Special case for empty strings */
3553 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003555
3556 e = p + PyString_GET_SIZE(self);
3557 for (; p < e; p++) {
3558 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003559 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003560 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003561 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562}
3563
3564
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003565PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003566"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003568Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003569and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003572string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573{
Fred Drakeba096332000-07-09 07:04:36 +00003574 register const unsigned char *p
3575 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576 register const unsigned char *e;
3577
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578 /* Shortcut for single character strings */
3579 if (PyString_GET_SIZE(self) == 1 &&
3580 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582
3583 /* Special case for empty strings */
3584 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586
3587 e = p + PyString_GET_SIZE(self);
3588 for (; p < e; p++) {
3589 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003590 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003591 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003592 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593}
3594
3595
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003597"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003599Return True if all characters in S are digits\n\
3600and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601
3602static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003603string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003604{
Fred Drakeba096332000-07-09 07:04:36 +00003605 register const unsigned char *p
3606 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003607 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003608
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609 /* Shortcut for single character strings */
3610 if (PyString_GET_SIZE(self) == 1 &&
3611 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003614 /* Special case for empty strings */
3615 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003617
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618 e = p + PyString_GET_SIZE(self);
3619 for (; p < e; p++) {
3620 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624}
3625
3626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003627PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003628"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003631at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632
3633static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003634string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635{
Fred Drakeba096332000-07-09 07:04:36 +00003636 register const unsigned char *p
3637 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003638 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639 int cased;
3640
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 /* Shortcut for single character strings */
3642 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003645 /* Special case for empty strings */
3646 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003648
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649 e = p + PyString_GET_SIZE(self);
3650 cased = 0;
3651 for (; p < e; p++) {
3652 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 else if (!cased && islower(*p))
3655 cased = 1;
3656 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658}
3659
3660
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003661PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003664Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003665at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666
3667static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003668string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669{
Fred Drakeba096332000-07-09 07:04:36 +00003670 register const unsigned char *p
3671 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003672 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 int cased;
3674
Guido van Rossum4c08d552000-03-10 22:55:18 +00003675 /* Shortcut for single character strings */
3676 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003679 /* Special case for empty strings */
3680 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003682
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683 e = p + PyString_GET_SIZE(self);
3684 cased = 0;
3685 for (; p < e; p++) {
3686 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688 else if (!cased && isupper(*p))
3689 cased = 1;
3690 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692}
3693
3694
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003695PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003698Return True if S is a titlecased string and there is at least one\n\
3699character in S, i.e. uppercase characters may only follow uncased\n\
3700characters and lowercase characters only cased ones. Return False\n\
3701otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702
3703static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003704string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705{
Fred Drakeba096332000-07-09 07:04:36 +00003706 register const unsigned char *p
3707 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003708 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709 int cased, previous_is_cased;
3710
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711 /* Shortcut for single character strings */
3712 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003713 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003715 /* Special case for empty strings */
3716 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003717 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003718
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 e = p + PyString_GET_SIZE(self);
3720 cased = 0;
3721 previous_is_cased = 0;
3722 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003723 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724
3725 if (isupper(ch)) {
3726 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 previous_is_cased = 1;
3729 cased = 1;
3730 }
3731 else if (islower(ch)) {
3732 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003733 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734 previous_is_cased = 1;
3735 cased = 1;
3736 }
3737 else
3738 previous_is_cased = 0;
3739 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003740 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741}
3742
3743
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003744PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003745"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746\n\
3747Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003748Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003749is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751static PyObject*
3752string_splitlines(PyStringObject *self, PyObject *args)
3753{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003754 register Py_ssize_t i;
3755 register Py_ssize_t j;
3756 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003757 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003758 PyObject *list;
3759 PyObject *str;
3760 char *data;
3761
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003762 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763 return NULL;
3764
3765 data = PyString_AS_STRING(self);
3766 len = PyString_GET_SIZE(self);
3767
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003768 /* This does not use the preallocated list because splitlines is
3769 usually run with hundreds of newlines. The overhead of
3770 switching between PyList_SET_ITEM and append causes about a
3771 2-3% slowdown for that common case. A smarter implementation
3772 could move the if check out, so the SET_ITEMs are done first
3773 and the appends only done when the prealloc buffer is full.
3774 That's too much work for little gain.*/
3775
Guido van Rossum4c08d552000-03-10 22:55:18 +00003776 list = PyList_New(0);
3777 if (!list)
3778 goto onError;
3779
3780 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003781 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003782
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783 /* Find a line and append it */
3784 while (i < len && data[i] != '\n' && data[i] != '\r')
3785 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786
3787 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003788 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789 if (i < len) {
3790 if (data[i] == '\r' && i + 1 < len &&
3791 data[i+1] == '\n')
3792 i += 2;
3793 else
3794 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003795 if (keepends)
3796 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003798 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799 j = i;
3800 }
3801 if (j < len) {
3802 SPLIT_APPEND(data, j, len);
3803 }
3804
3805 return list;
3806
3807 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003808 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809 return NULL;
3810}
3811
3812#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003813#undef SPLIT_ADD
3814#undef MAX_PREALLOC
3815#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003817static PyObject *
3818string_getnewargs(PyStringObject *v)
3819{
3820 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3821}
3822
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003823
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003824static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003825string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003826 /* Counterparts of the obsolete stropmodule functions; except
3827 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3829 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003830 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003831 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3832 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003833 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3834 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3835 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3836 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3837 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3838 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3839 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003840 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3841 capitalize__doc__},
3842 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3843 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3844 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003845 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003846 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3847 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3848 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3849 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3850 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3851 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3852 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003853 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3854 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003855 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3856 startswith__doc__},
3857 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3858 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3859 swapcase__doc__},
3860 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3861 translate__doc__},
3862 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3863 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3864 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3865 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3866 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3867 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3868 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3869 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3870 expandtabs__doc__},
3871 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3872 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003873 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003874 {NULL, NULL} /* sentinel */
3875};
3876
Jeremy Hylton938ace62002-07-17 16:30:39 +00003877static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003878str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3879
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003880static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003881string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003882{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003883 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003884 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003885
Guido van Rossumae960af2001-08-30 03:11:59 +00003886 if (type != &PyString_Type)
3887 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003888 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3889 return NULL;
3890 if (x == NULL)
3891 return PyString_FromString("");
3892 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003893}
3894
Guido van Rossumae960af2001-08-30 03:11:59 +00003895static PyObject *
3896str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3897{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003898 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003899 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003900
3901 assert(PyType_IsSubtype(type, &PyString_Type));
3902 tmp = string_new(&PyString_Type, args, kwds);
3903 if (tmp == NULL)
3904 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003905 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003906 n = PyString_GET_SIZE(tmp);
3907 pnew = type->tp_alloc(type, n);
3908 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003909 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003910 ((PyStringObject *)pnew)->ob_shash =
3911 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003912 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003913 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003914 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003915 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003916}
3917
Guido van Rossumcacfc072002-05-24 19:01:59 +00003918static PyObject *
3919basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3920{
3921 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003922 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003923 return NULL;
3924}
3925
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003926static PyObject *
3927string_mod(PyObject *v, PyObject *w)
3928{
3929 if (!PyString_Check(v)) {
3930 Py_INCREF(Py_NotImplemented);
3931 return Py_NotImplemented;
3932 }
3933 return PyString_Format(v, w);
3934}
3935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003936PyDoc_STRVAR(basestring_doc,
3937"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003938
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003939static PyNumberMethods string_as_number = {
3940 0, /*nb_add*/
3941 0, /*nb_subtract*/
3942 0, /*nb_multiply*/
3943 0, /*nb_divide*/
3944 string_mod, /*nb_remainder*/
3945};
3946
3947
Guido van Rossumcacfc072002-05-24 19:01:59 +00003948PyTypeObject PyBaseString_Type = {
3949 PyObject_HEAD_INIT(&PyType_Type)
3950 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003951 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003952 0,
3953 0,
3954 0, /* tp_dealloc */
3955 0, /* tp_print */
3956 0, /* tp_getattr */
3957 0, /* tp_setattr */
3958 0, /* tp_compare */
3959 0, /* tp_repr */
3960 0, /* tp_as_number */
3961 0, /* tp_as_sequence */
3962 0, /* tp_as_mapping */
3963 0, /* tp_hash */
3964 0, /* tp_call */
3965 0, /* tp_str */
3966 0, /* tp_getattro */
3967 0, /* tp_setattro */
3968 0, /* tp_as_buffer */
3969 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3970 basestring_doc, /* tp_doc */
3971 0, /* tp_traverse */
3972 0, /* tp_clear */
3973 0, /* tp_richcompare */
3974 0, /* tp_weaklistoffset */
3975 0, /* tp_iter */
3976 0, /* tp_iternext */
3977 0, /* tp_methods */
3978 0, /* tp_members */
3979 0, /* tp_getset */
3980 &PyBaseObject_Type, /* tp_base */
3981 0, /* tp_dict */
3982 0, /* tp_descr_get */
3983 0, /* tp_descr_set */
3984 0, /* tp_dictoffset */
3985 0, /* tp_init */
3986 0, /* tp_alloc */
3987 basestring_new, /* tp_new */
3988 0, /* tp_free */
3989};
3990
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003991PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003992"str(object) -> string\n\
3993\n\
3994Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003995If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003996
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003997PyTypeObject PyString_Type = {
3998 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003999 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004000 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004001 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004002 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004003 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004004 (printfunc)string_print, /* tp_print */
4005 0, /* tp_getattr */
4006 0, /* tp_setattr */
4007 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004008 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004009 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004010 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004011 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004012 (hashfunc)string_hash, /* tp_hash */
4013 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004014 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015 PyObject_GenericGetAttr, /* tp_getattro */
4016 0, /* tp_setattro */
4017 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004018 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004019 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004020 string_doc, /* tp_doc */
4021 0, /* tp_traverse */
4022 0, /* tp_clear */
4023 (richcmpfunc)string_richcompare, /* tp_richcompare */
4024 0, /* tp_weaklistoffset */
4025 0, /* tp_iter */
4026 0, /* tp_iternext */
4027 string_methods, /* tp_methods */
4028 0, /* tp_members */
4029 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004030 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 0, /* tp_dict */
4032 0, /* tp_descr_get */
4033 0, /* tp_descr_set */
4034 0, /* tp_dictoffset */
4035 0, /* tp_init */
4036 0, /* tp_alloc */
4037 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004038 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004039};
4040
4041void
Fred Drakeba096332000-07-09 07:04:36 +00004042PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004044 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004045 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004046 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 if (w == NULL || !PyString_Check(*pv)) {
4048 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004049 *pv = NULL;
4050 return;
4051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 v = string_concat((PyStringObject *) *pv, w);
4053 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004054 *pv = v;
4055}
4056
Guido van Rossum013142a1994-08-30 08:19:36 +00004057void
Fred Drakeba096332000-07-09 07:04:36 +00004058PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004059{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004060 PyString_Concat(pv, w);
4061 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004062}
4063
4064
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065/* The following function breaks the notion that strings are immutable:
4066 it changes the size of a string. We get away with this only if there
4067 is only one module referencing the object. You can also think of it
4068 as creating a new string object and destroying the old one, only
4069 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004070 already be known to some other part of the code...
4071 Note that if there's not enough memory to resize the string, the original
4072 string object at *pv is deallocated, *pv is set to NULL, an "out of
4073 memory" exception is set, and -1 is returned. Else (on success) 0 is
4074 returned, and the value in *pv may or may not be the same as on input.
4075 As always, an extra byte is allocated for a trailing \0 byte (newsize
4076 does *not* include that), and a trailing \0 byte is stored.
4077*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078
4079int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004080_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004081{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 register PyObject *v;
4083 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004084 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004085 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4086 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004087 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004088 Py_DECREF(v);
4089 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004090 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004091 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004092 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004093 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004094 _Py_ForgetReference(v);
4095 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004096 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004097 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004098 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004099 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004100 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004102 _Py_NewReference(*pv);
4103 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004104 sv->ob_size = newsize;
4105 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004106 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004107 return 0;
4108}
Guido van Rossume5372401993-03-16 12:15:04 +00004109
4110/* Helpers for formatstring */
4111
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004112Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004113getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004114{
Thomas Wouters977485d2006-02-16 15:59:12 +00004115 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004116 if (argidx < arglen) {
4117 (*p_argidx)++;
4118 if (arglen < 0)
4119 return args;
4120 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004121 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004122 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004123 PyErr_SetString(PyExc_TypeError,
4124 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004125 return NULL;
4126}
4127
Tim Peters38fd5b62000-09-21 05:43:11 +00004128/* Format codes
4129 * F_LJUST '-'
4130 * F_SIGN '+'
4131 * F_BLANK ' '
4132 * F_ALT '#'
4133 * F_ZERO '0'
4134 */
Guido van Rossume5372401993-03-16 12:15:04 +00004135#define F_LJUST (1<<0)
4136#define F_SIGN (1<<1)
4137#define F_BLANK (1<<2)
4138#define F_ALT (1<<3)
4139#define F_ZERO (1<<4)
4140
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004141Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004142formatfloat(char *buf, size_t buflen, int flags,
4143 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004144{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004145 /* fmt = '%#.' + `prec` + `type`
4146 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004147 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004148 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004149 x = PyFloat_AsDouble(v);
4150 if (x == -1.0 && PyErr_Occurred()) {
4151 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004152 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004153 }
Guido van Rossume5372401993-03-16 12:15:04 +00004154 if (prec < 0)
4155 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004156 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4157 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004158 /* Worst case length calc to ensure no buffer overrun:
4159
4160 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004161 fmt = %#.<prec>g
4162 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004163 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004165
4166 'f' formats:
4167 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4168 len = 1 + 50 + 1 + prec = 52 + prec
4169
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004170 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004171 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004172
4173 */
4174 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4175 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004176 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004177 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004178 return -1;
4179 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004180 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4181 (flags&F_ALT) ? "#" : "",
4182 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004183 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004184 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004185}
4186
Tim Peters38fd5b62000-09-21 05:43:11 +00004187/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4188 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4189 * Python's regular ints.
4190 * Return value: a new PyString*, or NULL if error.
4191 * . *pbuf is set to point into it,
4192 * *plen set to the # of chars following that.
4193 * Caller must decref it when done using pbuf.
4194 * The string starting at *pbuf is of the form
4195 * "-"? ("0x" | "0X")? digit+
4196 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004197 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004198 * There will be at least prec digits, zero-filled on the left if
4199 * necessary to get that many.
4200 * val object to be converted
4201 * flags bitmask of format flags; only F_ALT is looked at
4202 * prec minimum number of digits; 0-fill on left if needed
4203 * type a character in [duoxX]; u acts the same as d
4204 *
4205 * CAUTION: o, x and X conversions on regular ints can never
4206 * produce a '-' sign, but can for Python's unbounded ints.
4207 */
4208PyObject*
4209_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4210 char **pbuf, int *plen)
4211{
4212 PyObject *result = NULL;
4213 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004214 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004215 int sign; /* 1 if '-', else 0 */
4216 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004217 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004218 int numdigits; /* len == numnondigits + numdigits */
4219 int numnondigits = 0;
4220
4221 switch (type) {
4222 case 'd':
4223 case 'u':
4224 result = val->ob_type->tp_str(val);
4225 break;
4226 case 'o':
4227 result = val->ob_type->tp_as_number->nb_oct(val);
4228 break;
4229 case 'x':
4230 case 'X':
4231 numnondigits = 2;
4232 result = val->ob_type->tp_as_number->nb_hex(val);
4233 break;
4234 default:
4235 assert(!"'type' not in [duoxX]");
4236 }
4237 if (!result)
4238 return NULL;
4239
Neal Norwitz56423e52006-08-13 18:11:08 +00004240 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004241 if (!buf) {
4242 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004243 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004244 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004245
Tim Peters38fd5b62000-09-21 05:43:11 +00004246 /* To modify the string in-place, there can only be one reference. */
4247 if (result->ob_refcnt != 1) {
4248 PyErr_BadInternalCall();
4249 return NULL;
4250 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004251 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004252 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004253 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4254 return NULL;
4255 }
4256 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004257 if (buf[len-1] == 'L') {
4258 --len;
4259 buf[len] = '\0';
4260 }
4261 sign = buf[0] == '-';
4262 numnondigits += sign;
4263 numdigits = len - numnondigits;
4264 assert(numdigits > 0);
4265
Tim Petersfff53252001-04-12 18:38:48 +00004266 /* Get rid of base marker unless F_ALT */
4267 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004268 /* Need to skip 0x, 0X or 0. */
4269 int skipped = 0;
4270 switch (type) {
4271 case 'o':
4272 assert(buf[sign] == '0');
4273 /* If 0 is only digit, leave it alone. */
4274 if (numdigits > 1) {
4275 skipped = 1;
4276 --numdigits;
4277 }
4278 break;
4279 case 'x':
4280 case 'X':
4281 assert(buf[sign] == '0');
4282 assert(buf[sign + 1] == 'x');
4283 skipped = 2;
4284 numnondigits -= 2;
4285 break;
4286 }
4287 if (skipped) {
4288 buf += skipped;
4289 len -= skipped;
4290 if (sign)
4291 buf[0] = '-';
4292 }
4293 assert(len == numnondigits + numdigits);
4294 assert(numdigits > 0);
4295 }
4296
4297 /* Fill with leading zeroes to meet minimum width. */
4298 if (prec > numdigits) {
4299 PyObject *r1 = PyString_FromStringAndSize(NULL,
4300 numnondigits + prec);
4301 char *b1;
4302 if (!r1) {
4303 Py_DECREF(result);
4304 return NULL;
4305 }
4306 b1 = PyString_AS_STRING(r1);
4307 for (i = 0; i < numnondigits; ++i)
4308 *b1++ = *buf++;
4309 for (i = 0; i < prec - numdigits; i++)
4310 *b1++ = '0';
4311 for (i = 0; i < numdigits; i++)
4312 *b1++ = *buf++;
4313 *b1 = '\0';
4314 Py_DECREF(result);
4315 result = r1;
4316 buf = PyString_AS_STRING(result);
4317 len = numnondigits + prec;
4318 }
4319
4320 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004321 if (type == 'X') {
4322 /* Need to convert all lower case letters to upper case.
4323 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004324 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004325 if (buf[i] >= 'a' && buf[i] <= 'x')
4326 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004327 }
4328 *pbuf = buf;
4329 *plen = len;
4330 return result;
4331}
4332
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004333Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004334formatint(char *buf, size_t buflen, int flags,
4335 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004336{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004337 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004338 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4339 + 1 + 1 = 24 */
4340 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004341 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004342 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004343
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004344 x = PyInt_AsLong(v);
4345 if (x == -1 && PyErr_Occurred()) {
4346 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004347 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004348 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004349 if (x < 0 && type == 'u') {
4350 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004351 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004352 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4353 sign = "-";
4354 else
4355 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004356 if (prec < 0)
4357 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004358
4359 if ((flags & F_ALT) &&
4360 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004361 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004362 * of issues that cause pain:
4363 * - when 0 is being converted, the C standard leaves off
4364 * the '0x' or '0X', which is inconsistent with other
4365 * %#x/%#X conversions and inconsistent with Python's
4366 * hex() function
4367 * - there are platforms that violate the standard and
4368 * convert 0 with the '0x' or '0X'
4369 * (Metrowerks, Compaq Tru64)
4370 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004371 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004372 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004373 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004374 * We can achieve the desired consistency by inserting our
4375 * own '0x' or '0X' prefix, and substituting %x/%X in place
4376 * of %#x/%#X.
4377 *
4378 * Note that this is the same approach as used in
4379 * formatint() in unicodeobject.c
4380 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004381 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4382 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004383 }
4384 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004385 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4386 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004387 prec, type);
4388 }
4389
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004390 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4391 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004392 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004393 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004394 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004395 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004396 return -1;
4397 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004398 if (sign[0])
4399 PyOS_snprintf(buf, buflen, fmt, -x);
4400 else
4401 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004402 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004403}
4404
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004405Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004406formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004407{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004408 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004409 if (PyString_Check(v)) {
4410 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004411 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004412 }
4413 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004414 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004415 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004416 }
4417 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004418 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004419}
4420
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004421/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4422
4423 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4424 chars are formatted. XXX This is a magic number. Each formatting
4425 routine does bounds checking to ensure no overflow, but a better
4426 solution may be to malloc a buffer of appropriate size for each
4427 format. For now, the current solution is sufficient.
4428*/
4429#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004430
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004431PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004432PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004433{
4434 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004435 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004436 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004437 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004438 PyObject *result, *orig_args;
4439#ifdef Py_USING_UNICODE
4440 PyObject *v, *w;
4441#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004442 PyObject *dict = NULL;
4443 if (format == NULL || !PyString_Check(format) || args == NULL) {
4444 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004445 return NULL;
4446 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004447 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004448 fmt = PyString_AS_STRING(format);
4449 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004450 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004451 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004452 if (result == NULL)
4453 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004454 res = PyString_AsString(result);
4455 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004456 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004457 argidx = 0;
4458 }
4459 else {
4460 arglen = -1;
4461 argidx = -2;
4462 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004463 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4464 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004465 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004466 while (--fmtcnt >= 0) {
4467 if (*fmt != '%') {
4468 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004469 rescnt = fmtcnt + 100;
4470 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004471 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004472 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004473 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004475 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004476 }
4477 *res++ = *fmt++;
4478 }
4479 else {
4480 /* Got a format specifier */
4481 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004482 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004483 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004484 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004485 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004486 PyObject *v = NULL;
4487 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004488 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004489 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004490 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004491 char formatbuf[FORMATBUFLEN];
4492 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004493#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004494 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004495 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004496#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004497
Guido van Rossumda9c2711996-12-05 21:58:58 +00004498 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004499 if (*fmt == '(') {
4500 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004501 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004503 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004504
4505 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004506 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004507 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 goto error;
4509 }
4510 ++fmt;
4511 --fmtcnt;
4512 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004513 /* Skip over balanced parentheses */
4514 while (pcount > 0 && --fmtcnt >= 0) {
4515 if (*fmt == ')')
4516 --pcount;
4517 else if (*fmt == '(')
4518 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004519 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004520 }
4521 keylen = fmt - keystart - 1;
4522 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004523 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004524 "incomplete format key");
4525 goto error;
4526 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004527 key = PyString_FromStringAndSize(keystart,
4528 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004529 if (key == NULL)
4530 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004531 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004532 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004533 args_owned = 0;
4534 }
4535 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004536 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004537 if (args == NULL) {
4538 goto error;
4539 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004540 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004541 arglen = -1;
4542 argidx = -2;
4543 }
Guido van Rossume5372401993-03-16 12:15:04 +00004544 while (--fmtcnt >= 0) {
4545 switch (c = *fmt++) {
4546 case '-': flags |= F_LJUST; continue;
4547 case '+': flags |= F_SIGN; continue;
4548 case ' ': flags |= F_BLANK; continue;
4549 case '#': flags |= F_ALT; continue;
4550 case '0': flags |= F_ZERO; continue;
4551 }
4552 break;
4553 }
4554 if (c == '*') {
4555 v = getnextarg(args, arglen, &argidx);
4556 if (v == NULL)
4557 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004558 if (!PyInt_Check(v)) {
4559 PyErr_SetString(PyExc_TypeError,
4560 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004561 goto error;
4562 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004563 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004564 if (width < 0) {
4565 flags |= F_LJUST;
4566 width = -width;
4567 }
Guido van Rossume5372401993-03-16 12:15:04 +00004568 if (--fmtcnt >= 0)
4569 c = *fmt++;
4570 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004571 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004572 width = c - '0';
4573 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004574 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004575 if (!isdigit(c))
4576 break;
4577 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 PyErr_SetString(
4579 PyExc_ValueError,
4580 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004581 goto error;
4582 }
4583 width = width*10 + (c - '0');
4584 }
4585 }
4586 if (c == '.') {
4587 prec = 0;
4588 if (--fmtcnt >= 0)
4589 c = *fmt++;
4590 if (c == '*') {
4591 v = getnextarg(args, arglen, &argidx);
4592 if (v == NULL)
4593 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004594 if (!PyInt_Check(v)) {
4595 PyErr_SetString(
4596 PyExc_TypeError,
4597 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004598 goto error;
4599 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004600 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004601 if (prec < 0)
4602 prec = 0;
4603 if (--fmtcnt >= 0)
4604 c = *fmt++;
4605 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004606 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004607 prec = c - '0';
4608 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004609 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004610 if (!isdigit(c))
4611 break;
4612 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004613 PyErr_SetString(
4614 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004615 "prec too big");
4616 goto error;
4617 }
4618 prec = prec*10 + (c - '0');
4619 }
4620 }
4621 } /* prec */
4622 if (fmtcnt >= 0) {
4623 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004624 if (--fmtcnt >= 0)
4625 c = *fmt++;
4626 }
4627 }
4628 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004629 PyErr_SetString(PyExc_ValueError,
4630 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004631 goto error;
4632 }
4633 if (c != '%') {
4634 v = getnextarg(args, arglen, &argidx);
4635 if (v == NULL)
4636 goto error;
4637 }
4638 sign = 0;
4639 fill = ' ';
4640 switch (c) {
4641 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004642 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004643 len = 1;
4644 break;
4645 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004646#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004647 if (PyUnicode_Check(v)) {
4648 fmt = fmt_start;
4649 argidx = argidx_start;
4650 goto unicode;
4651 }
Georg Brandld45014b2005-10-01 17:06:00 +00004652#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004653 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004654#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004655 if (temp != NULL && PyUnicode_Check(temp)) {
4656 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004657 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004658 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004659 goto unicode;
4660 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004661#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004662 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004663 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004664 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004665 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004666 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004667 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004668 if (!PyString_Check(temp)) {
4669 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004670 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004671 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004672 goto error;
4673 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004674 pbuf = PyString_AS_STRING(temp);
4675 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004676 if (prec >= 0 && len > prec)
4677 len = prec;
4678 break;
4679 case 'i':
4680 case 'd':
4681 case 'u':
4682 case 'o':
4683 case 'x':
4684 case 'X':
4685 if (c == 'i')
4686 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004687 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004688 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004689 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004690 prec, c, &pbuf, &ilen);
4691 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004692 if (!temp)
4693 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004694 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004695 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004696 else {
4697 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004698 len = formatint(pbuf,
4699 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004700 flags, prec, c, v);
4701 if (len < 0)
4702 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004703 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004704 }
4705 if (flags & F_ZERO)
4706 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004707 break;
4708 case 'e':
4709 case 'E':
4710 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004711 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004712 case 'g':
4713 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004714 if (c == 'F')
4715 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004716 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004717 len = formatfloat(pbuf, sizeof(formatbuf),
4718 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004719 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004720 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004721 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004723 fill = '0';
4724 break;
4725 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004726#ifdef Py_USING_UNICODE
4727 if (PyUnicode_Check(v)) {
4728 fmt = fmt_start;
4729 argidx = argidx_start;
4730 goto unicode;
4731 }
4732#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004733 pbuf = formatbuf;
4734 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004735 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004736 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004737 break;
4738 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004739 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004740 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004741 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004742 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004743 (Py_ssize_t)(fmt - 1 -
4744 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004745 goto error;
4746 }
4747 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004748 if (*pbuf == '-' || *pbuf == '+') {
4749 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004750 len--;
4751 }
4752 else if (flags & F_SIGN)
4753 sign = '+';
4754 else if (flags & F_BLANK)
4755 sign = ' ';
4756 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004757 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004758 }
4759 if (width < len)
4760 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004761 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004762 reslen -= rescnt;
4763 rescnt = width + fmtcnt + 100;
4764 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004765 if (reslen < 0) {
4766 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004767 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004768 return PyErr_NoMemory();
4769 }
Georg Brandl5f795862007-02-26 13:51:34 +00004770 if (_PyString_Resize(&result, reslen) < 0) {
4771 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004772 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004773 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004774 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004775 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004776 }
4777 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004778 if (fill != ' ')
4779 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004780 rescnt--;
4781 if (width > len)
4782 width--;
4783 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004784 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4785 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004786 assert(pbuf[1] == c);
4787 if (fill != ' ') {
4788 *res++ = *pbuf++;
4789 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 }
Tim Petersfff53252001-04-12 18:38:48 +00004791 rescnt -= 2;
4792 width -= 2;
4793 if (width < 0)
4794 width = 0;
4795 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004796 }
4797 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004798 do {
4799 --rescnt;
4800 *res++ = fill;
4801 } while (--width > len);
4802 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004803 if (fill == ' ') {
4804 if (sign)
4805 *res++ = sign;
4806 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004807 (c == 'x' || c == 'X')) {
4808 assert(pbuf[0] == '0');
4809 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004810 *res++ = *pbuf++;
4811 *res++ = *pbuf++;
4812 }
4813 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004814 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004815 res += len;
4816 rescnt -= len;
4817 while (--width >= len) {
4818 --rescnt;
4819 *res++ = ' ';
4820 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004821 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004822 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004823 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004824 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004825 goto error;
4826 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004827 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004828 } /* '%' */
4829 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004830 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004831 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004832 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004833 goto error;
4834 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004835 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004836 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004837 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004838 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004839 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004840
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004841#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004842 unicode:
4843 if (args_owned) {
4844 Py_DECREF(args);
4845 args_owned = 0;
4846 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004847 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004848 if (PyTuple_Check(orig_args) && argidx > 0) {
4849 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004850 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004851 v = PyTuple_New(n);
4852 if (v == NULL)
4853 goto error;
4854 while (--n >= 0) {
4855 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4856 Py_INCREF(w);
4857 PyTuple_SET_ITEM(v, n, w);
4858 }
4859 args = v;
4860 } else {
4861 Py_INCREF(orig_args);
4862 args = orig_args;
4863 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004864 args_owned = 1;
4865 /* Take what we have of the result and let the Unicode formatting
4866 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004867 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004868 if (_PyString_Resize(&result, rescnt))
4869 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004870 fmtcnt = PyString_GET_SIZE(format) - \
4871 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004872 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4873 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004874 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004875 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004876 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004877 if (v == NULL)
4878 goto error;
4879 /* Paste what we have (result) to what the Unicode formatting
4880 function returned (v) and return the result (or error) */
4881 w = PyUnicode_Concat(result, v);
4882 Py_DECREF(result);
4883 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004884 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004885 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004886#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004887
Guido van Rossume5372401993-03-16 12:15:04 +00004888 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004889 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004890 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004891 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004892 }
Guido van Rossume5372401993-03-16 12:15:04 +00004893 return NULL;
4894}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004895
Guido van Rossum2a61e741997-01-18 07:55:05 +00004896void
Fred Drakeba096332000-07-09 07:04:36 +00004897PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004898{
4899 register PyStringObject *s = (PyStringObject *)(*p);
4900 PyObject *t;
4901 if (s == NULL || !PyString_Check(s))
4902 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004903 /* If it's a string subclass, we don't really know what putting
4904 it in the interned dict might do. */
4905 if (!PyString_CheckExact(s))
4906 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004907 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004908 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004909 if (interned == NULL) {
4910 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004911 if (interned == NULL) {
4912 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004913 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004914 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004915 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004916 t = PyDict_GetItem(interned, (PyObject *)s);
4917 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004918 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004919 Py_DECREF(*p);
4920 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004921 return;
4922 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004923
Armin Rigo79f7ad22004-08-07 19:27:39 +00004924 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004925 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004926 return;
4927 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004928 /* The two references in interned are not counted by refcnt.
4929 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004930 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004931 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004932}
4933
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004934void
4935PyString_InternImmortal(PyObject **p)
4936{
4937 PyString_InternInPlace(p);
4938 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4939 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4940 Py_INCREF(*p);
4941 }
4942}
4943
Guido van Rossum2a61e741997-01-18 07:55:05 +00004944
4945PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004946PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947{
4948 PyObject *s = PyString_FromString(cp);
4949 if (s == NULL)
4950 return NULL;
4951 PyString_InternInPlace(&s);
4952 return s;
4953}
4954
Guido van Rossum8cf04761997-08-02 02:57:45 +00004955void
Fred Drakeba096332000-07-09 07:04:36 +00004956PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004957{
4958 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004959 for (i = 0; i < UCHAR_MAX + 1; i++) {
4960 Py_XDECREF(characters[i]);
4961 characters[i] = NULL;
4962 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004963 Py_XDECREF(nullstring);
4964 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004965}
Barry Warsawa903ad982001-02-23 16:40:48 +00004966
Barry Warsawa903ad982001-02-23 16:40:48 +00004967void _Py_ReleaseInternedStrings(void)
4968{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004969 PyObject *keys;
4970 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004971 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972
4973 if (interned == NULL || !PyDict_Check(interned))
4974 return;
4975 keys = PyDict_Keys(interned);
4976 if (keys == NULL || !PyList_Check(keys)) {
4977 PyErr_Clear();
4978 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004979 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004980
4981 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4982 detector, interned strings are not forcibly deallocated; rather, we
4983 give them their stolen references back, and then clear and DECREF
4984 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004985
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004986 fprintf(stderr, "releasing interned strings\n");
4987 n = PyList_GET_SIZE(keys);
4988 for (i = 0; i < n; i++) {
4989 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4990 switch (s->ob_sstate) {
4991 case SSTATE_NOT_INTERNED:
4992 /* XXX Shouldn't happen */
4993 break;
4994 case SSTATE_INTERNED_IMMORTAL:
4995 s->ob_refcnt += 1;
4996 break;
4997 case SSTATE_INTERNED_MORTAL:
4998 s->ob_refcnt += 2;
4999 break;
5000 default:
5001 Py_FatalError("Inconsistent interned string state.");
5002 }
5003 s->ob_sstate = SSTATE_NOT_INTERNED;
5004 }
5005 Py_DECREF(keys);
5006 PyDict_Clear(interned);
5007 Py_DECREF(interned);
5008 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005009}