blob: 0e3dc4f780e46d716424afbf1cc0dd75060e269c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
755 "%.200s found", obj->ob_type->tp_name);
756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000793 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000809 char *data = op->ob_sval;
810 Py_ssize_t size = op->ob_size;
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000822 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000824 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000825#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000831 if (memchr(op->ob_sval, '\'', op->ob_size) &&
832 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
835 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 for (i = 0; i < op->ob_size; i++) {
837 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000844 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 fprintf(fp, "\\r");
846 else if (c < ' ' || c >= 0x7f)
847 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000848 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000849 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000852 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853}
854
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000855PyObject *
856PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000859 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000861 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000862 PyErr_SetString(PyExc_OverflowError,
863 "string is too large to make repr");
864 }
865 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000867 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868 }
869 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000870 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 register char c;
872 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000873 int quote;
874
Thomas Wouters7e474022000-07-16 12:04:32 +0000875 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000876 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000877 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000878 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000879 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000880 quote = '"';
881
Tim Peters9161c8b2001-12-03 01:55:38 +0000882 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000883 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000885 /* There's at least enough room for a hex escape
886 and a closing quote. */
887 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000891 else if (c == '\t')
892 *p++ = '\\', *p++ = 't';
893 else if (c == '\n')
894 *p++ = '\\', *p++ = 'n';
895 else if (c == '\r')
896 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000897 else if (c < ' ' || c >= 0x7f) {
898 /* For performance, we don't want to call
899 PyOS_snprintf here (extra layers of
900 function call). */
901 sprintf(p, "\\x%02x", c & 0xff);
902 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000903 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000904 else
905 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000907 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000908 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000910 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000911 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000912 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914}
915
Guido van Rossum189f1df2001-05-01 16:51:53 +0000916static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000917string_repr(PyObject *op)
918{
919 return PyString_Repr(op, 1);
920}
921
922static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000923string_str(PyObject *s)
924{
Tim Petersc9933152001-10-16 20:18:24 +0000925 assert(PyString_Check(s));
926 if (PyString_CheckExact(s)) {
927 Py_INCREF(s);
928 return s;
929 }
930 else {
931 /* Subtype -- return genuine string with the same value. */
932 PyStringObject *t = (PyStringObject *) s;
933 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
934 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000935}
936
Martin v. Löwis18e16552006-02-15 17:27:45 +0000937static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000938string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939{
940 return a->ob_size;
941}
942
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000944string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945{
Andrew Dalke598710c2006-05-25 18:18:39 +0000946 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 register PyStringObject *op;
948 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000949#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000950 if (PyUnicode_Check(bb))
951 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000952#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000953 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000954 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000955 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956 return NULL;
957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000960 if ((a->ob_size == 0 || b->ob_size == 0) &&
961 PyString_CheckExact(a) && PyString_CheckExact(b)) {
962 if (a->ob_size == 0) {
963 Py_INCREF(bb);
964 return bb;
965 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966 Py_INCREF(a);
967 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 }
969 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000970 if (size < 0) {
971 PyErr_SetString(PyExc_OverflowError,
972 "strings are too large to concat");
973 return NULL;
974 }
975
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000976 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000977 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000978 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000980 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000981 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000982 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000983 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
984 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000985 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987#undef b
988}
989
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000993 register Py_ssize_t i;
994 register Py_ssize_t j;
995 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000997 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 if (n < 0)
999 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001000 /* watch out for overflows: the size can overflow int,
1001 * and the # of bytes needed can overflow size_t
1002 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001004 if (n && size / n != a->ob_size) {
1005 PyErr_SetString(PyExc_OverflowError,
1006 "repeated string is too long");
1007 return NULL;
1008 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001009 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 Py_INCREF(a);
1011 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012 }
Tim Peterse7c05322004-06-27 17:24:49 +00001013 nbytes = (size_t)size;
1014 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001020 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001021 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001023 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001024 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001025 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001026 op->ob_sval[size] = '\0';
1027 if (a->ob_size == 1 && n > 0) {
1028 memset(op->ob_sval, a->ob_sval[0] , n);
1029 return (PyObject *) op;
1030 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001031 i = 0;
1032 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001033 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001035 }
1036 while (i < size) {
1037 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001038 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001039 i += j;
1040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042}
1043
1044/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1045
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001047string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001049 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050{
1051 if (i < 0)
1052 i = 0;
1053 if (j < 0)
1054 j = 0; /* Avoid signed/unsigned bug in next line */
1055 if (j > a->ob_size)
1056 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001057 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1058 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059 Py_INCREF(a);
1060 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001061 }
1062 if (j < i)
1063 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001064 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001065}
1066
Guido van Rossum9284a572000-03-07 15:53:43 +00001067static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001069{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001070 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001071#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001072 if (PyUnicode_Check(sub_obj))
1073 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001074#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001075 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001076 PyErr_SetString(PyExc_TypeError,
1077 "'in <string>' requires string as left operand");
1078 return -1;
1079 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001080 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001081
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001082 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001083}
1084
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001085static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001086string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001087{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001088 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001089 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001090 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001091 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092 return NULL;
1093 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001094 pchar = a->ob_sval[i];
1095 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001096 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001097 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001098 else {
1099#ifdef COUNT_ALLOCS
1100 one_strings++;
1101#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001102 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001103 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001104 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001105}
1106
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107static PyObject*
1108string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001109{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001110 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001111 Py_ssize_t len_a, len_b;
1112 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001113 PyObject *result;
1114
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001115 /* Make sure both arguments are strings. */
1116 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117 result = Py_NotImplemented;
1118 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001119 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 if (a == b) {
1121 switch (op) {
1122 case Py_EQ:case Py_LE:case Py_GE:
1123 result = Py_True;
1124 goto out;
1125 case Py_NE:case Py_LT:case Py_GT:
1126 result = Py_False;
1127 goto out;
1128 }
1129 }
1130 if (op == Py_EQ) {
1131 /* Supporting Py_NE here as well does not save
1132 much time, since Py_NE is rarely used. */
1133 if (a->ob_size == b->ob_size
1134 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001135 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001136 a->ob_size) == 0)) {
1137 result = Py_True;
1138 } else {
1139 result = Py_False;
1140 }
1141 goto out;
1142 }
1143 len_a = a->ob_size; len_b = b->ob_size;
1144 min_len = (len_a < len_b) ? len_a : len_b;
1145 if (min_len > 0) {
1146 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1147 if (c==0)
1148 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1149 }else
1150 c = 0;
1151 if (c == 0)
1152 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1153 switch (op) {
1154 case Py_LT: c = c < 0; break;
1155 case Py_LE: c = c <= 0; break;
1156 case Py_EQ: assert(0); break; /* unreachable */
1157 case Py_NE: c = c != 0; break;
1158 case Py_GT: c = c > 0; break;
1159 case Py_GE: c = c >= 0; break;
1160 default:
1161 result = Py_NotImplemented;
1162 goto out;
1163 }
1164 result = c ? Py_True : Py_False;
1165 out:
1166 Py_INCREF(result);
1167 return result;
1168}
1169
1170int
1171_PyString_Eq(PyObject *o1, PyObject *o2)
1172{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001173 PyStringObject *a = (PyStringObject*) o1;
1174 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001175 return a->ob_size == b->ob_size
1176 && *a->ob_sval == *b->ob_sval
1177 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001178}
1179
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180static long
Fred Drakeba096332000-07-09 07:04:36 +00001181string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001182{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001183 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 register unsigned char *p;
1185 register long x;
1186
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 if (a->ob_shash != -1)
1188 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 len = a->ob_size;
1190 p = (unsigned char *) a->ob_sval;
1191 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001192 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001193 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001194 x ^= a->ob_size;
1195 if (x == -1)
1196 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001197 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 return x;
1199}
1200
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201static PyObject*
1202string_subscript(PyStringObject* self, PyObject* item)
1203{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001204 if (PyIndex_Check(item)) {
1205 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 if (i == -1 && PyErr_Occurred())
1207 return NULL;
1208 if (i < 0)
1209 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001210 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 }
1212 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 char* source_buf;
1215 char* result_buf;
1216 PyObject* result;
1217
Tim Petersae1d0c92006-03-17 03:29:34 +00001218 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 PyString_GET_SIZE(self),
1220 &start, &stop, &step, &slicelength) < 0) {
1221 return NULL;
1222 }
1223
1224 if (slicelength <= 0) {
1225 return PyString_FromStringAndSize("", 0);
1226 }
1227 else {
1228 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001229 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001230 if (result_buf == NULL)
1231 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232
Tim Petersae1d0c92006-03-17 03:29:34 +00001233 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 cur += step, i++) {
1235 result_buf[i] = source_buf[cur];
1236 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001237
1238 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 slicelength);
1240 PyMem_Free(result_buf);
1241 return result;
1242 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001243 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001245 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 "string indices must be integers");
1247 return NULL;
1248 }
1249}
1250
Martin v. Löwis18e16552006-02-15 17:27:45 +00001251static Py_ssize_t
1252string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253{
1254 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001256 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258 }
1259 *ptr = (void *)self->ob_sval;
1260 return self->ob_size;
1261}
1262
Martin v. Löwis18e16552006-02-15 17:27:45 +00001263static Py_ssize_t
1264string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001265{
Guido van Rossum045e6881997-09-08 18:30:11 +00001266 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001267 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268 return -1;
1269}
1270
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271static Py_ssize_t
1272string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273{
1274 if ( lenp )
1275 *lenp = self->ob_size;
1276 return 1;
1277}
1278
Martin v. Löwis18e16552006-02-15 17:27:45 +00001279static Py_ssize_t
1280string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001281{
1282 if ( index != 0 ) {
1283 PyErr_SetString(PyExc_SystemError,
1284 "accessing non-existent string segment");
1285 return -1;
1286 }
1287 *ptr = self->ob_sval;
1288 return self->ob_size;
1289}
1290
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001291static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001293 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001294 (ssizeargfunc)string_repeat, /*sq_repeat*/
1295 (ssizeargfunc)string_item, /*sq_item*/
1296 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001297 0, /*sq_ass_item*/
1298 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001299 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001300};
1301
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001302static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001303 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001304 (binaryfunc)string_subscript,
1305 0,
1306};
1307
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001308static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309 (readbufferproc)string_buffer_getreadbuf,
1310 (writebufferproc)string_buffer_getwritebuf,
1311 (segcountproc)string_buffer_getsegcount,
1312 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001313};
1314
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315
1316
1317#define LEFTSTRIP 0
1318#define RIGHTSTRIP 1
1319#define BOTHSTRIP 2
1320
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001321/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001322static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1323
1324#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001325
Andrew Dalke525eab32006-05-26 14:00:45 +00001326
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001327/* Don't call if length < 2 */
1328#define Py_STRING_MATCH(target, offset, pattern, length) \
1329 (target[offset] == pattern[0] && \
1330 target[offset+length-1] == pattern[length-1] && \
1331 !memcmp(target+offset+1, pattern+1, length-2) )
1332
1333
Andrew Dalke525eab32006-05-26 14:00:45 +00001334/* Overallocate the initial list to reduce the number of reallocs for small
1335 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1336 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1337 text (roughly 11 words per line) and field delimited data (usually 1-10
1338 fields). For large strings the split algorithms are bandwidth limited
1339 so increasing the preallocation likely will not improve things.*/
1340
1341#define MAX_PREALLOC 12
1342
1343/* 5 splits gives 6 elements */
1344#define PREALLOC_SIZE(maxsplit) \
1345 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1346
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347#define SPLIT_APPEND(data, left, right) \
1348 str = PyString_FromStringAndSize((data) + (left), \
1349 (right) - (left)); \
1350 if (str == NULL) \
1351 goto onError; \
1352 if (PyList_Append(list, str)) { \
1353 Py_DECREF(str); \
1354 goto onError; \
1355 } \
1356 else \
1357 Py_DECREF(str);
1358
Andrew Dalke02758d62006-05-26 15:21:01 +00001359#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001360 str = PyString_FromStringAndSize((data) + (left), \
1361 (right) - (left)); \
1362 if (str == NULL) \
1363 goto onError; \
1364 if (count < MAX_PREALLOC) { \
1365 PyList_SET_ITEM(list, count, str); \
1366 } else { \
1367 if (PyList_Append(list, str)) { \
1368 Py_DECREF(str); \
1369 goto onError; \
1370 } \
1371 else \
1372 Py_DECREF(str); \
1373 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001374 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001375
1376/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001377#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001378
Andrew Dalke02758d62006-05-26 15:21:01 +00001379#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1380#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1381#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1382#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1383
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001384Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001385split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386{
Andrew Dalke525eab32006-05-26 14:00:45 +00001387 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001388 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001389 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390
1391 if (list == NULL)
1392 return NULL;
1393
Andrew Dalke02758d62006-05-26 15:21:01 +00001394 i = j = 0;
1395
1396 while (maxsplit-- > 0) {
1397 SKIP_SPACE(s, i, len);
1398 if (i==len) break;
1399 j = i; i++;
1400 SKIP_NONSPACE(s, i, len);
1401 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001403
1404 if (i < len) {
1405 /* Only occurs when maxsplit was reached */
1406 /* Skip any remaining whitespace and copy to end of string */
1407 SKIP_SPACE(s, i, len);
1408 if (i != len)
1409 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001410 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001411 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001413 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414 Py_DECREF(list);
1415 return NULL;
1416}
1417
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001418Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001419split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001420{
Andrew Dalke525eab32006-05-26 14:00:45 +00001421 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001423 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001424
1425 if (list == NULL)
1426 return NULL;
1427
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001428 i = j = 0;
1429 while ((j < len) && (maxcount-- > 0)) {
1430 for(; j<len; j++) {
1431 /* I found that using memchr makes no difference */
1432 if (s[j] == ch) {
1433 SPLIT_ADD(s, i, j);
1434 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001435 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001436 }
1437 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001438 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001439 if (i <= len) {
1440 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001442 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 return list;
1444
1445 onError:
1446 Py_DECREF(list);
1447 return NULL;
1448}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001450PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001451"S.split([sep [,maxsplit]]) -> list of strings\n\
1452\n\
1453Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001454delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001455splits are done. If sep is not specified or is None, any\n\
1456whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457
1458static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001459string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001461 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001462 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001463 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001465#ifdef USE_FAST
1466 Py_ssize_t pos;
1467#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468
Martin v. Löwis9c830762006-04-13 08:37:17 +00001469 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001471 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001472 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001473 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001475 if (PyString_Check(subobj)) {
1476 sub = PyString_AS_STRING(subobj);
1477 n = PyString_GET_SIZE(subobj);
1478 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001479#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 else if (PyUnicode_Check(subobj))
1481 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001482#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1484 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001485
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 if (n == 0) {
1487 PyErr_SetString(PyExc_ValueError, "empty separator");
1488 return NULL;
1489 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001490 else if (n == 1)
1491 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492
Andrew Dalke525eab32006-05-26 14:00:45 +00001493 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 if (list == NULL)
1495 return NULL;
1496
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001497#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001499 while (maxsplit-- > 0) {
1500 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1501 if (pos < 0)
1502 break;
1503 j = i+pos;
1504 SPLIT_ADD(s, i, j);
1505 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001507#else
1508 i = j = 0;
1509 while ((j+n <= len) && (maxsplit-- > 0)) {
1510 for (; j+n <= len; j++) {
1511 if (Py_STRING_MATCH(s, j, sub, n)) {
1512 SPLIT_ADD(s, i, j);
1513 i = j = j + n;
1514 break;
1515 }
1516 }
1517 }
1518#endif
1519 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001520 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 return list;
1522
Andrew Dalke525eab32006-05-26 14:00:45 +00001523 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 Py_DECREF(list);
1525 return NULL;
1526}
1527
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001528PyDoc_STRVAR(partition__doc__,
1529"S.partition(sep) -> (head, sep, tail)\n\
1530\n\
1531Searches for the separator sep in S, and returns the part before it,\n\
1532the separator itself, and the part after it. If the separator is not\n\
1533found, returns S and two empty strings.");
1534
1535static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001536string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001537{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001538 const char *sep;
1539 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001540
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001541 if (PyString_Check(sep_obj)) {
1542 sep = PyString_AS_STRING(sep_obj);
1543 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001544 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001545#ifdef Py_USING_UNICODE
1546 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001547 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001548#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001549 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001550 return NULL;
1551
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001552 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001553 (PyObject*) self,
1554 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1555 sep_obj, sep, sep_len
1556 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001557}
1558
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001559PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001560"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001561\n\
1562Searches for the separator sep in S, starting at the end of S, and returns\n\
1563the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001564separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001565
1566static PyObject *
1567string_rpartition(PyStringObject *self, PyObject *sep_obj)
1568{
1569 const char *sep;
1570 Py_ssize_t sep_len;
1571
1572 if (PyString_Check(sep_obj)) {
1573 sep = PyString_AS_STRING(sep_obj);
1574 sep_len = PyString_GET_SIZE(sep_obj);
1575 }
1576#ifdef Py_USING_UNICODE
1577 else if (PyUnicode_Check(sep_obj))
1578 return PyUnicode_Partition((PyObject *) self, sep_obj);
1579#endif
1580 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1581 return NULL;
1582
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001583 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001584 (PyObject*) self,
1585 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1586 sep_obj, sep, sep_len
1587 );
1588}
1589
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001590Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001591rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001592{
Andrew Dalke525eab32006-05-26 14:00:45 +00001593 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001594 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001595 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001596
1597 if (list == NULL)
1598 return NULL;
1599
Andrew Dalke02758d62006-05-26 15:21:01 +00001600 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001601
Andrew Dalke02758d62006-05-26 15:21:01 +00001602 while (maxsplit-- > 0) {
1603 RSKIP_SPACE(s, i);
1604 if (i<0) break;
1605 j = i; i--;
1606 RSKIP_NONSPACE(s, i);
1607 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001608 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001609 if (i >= 0) {
1610 /* Only occurs when maxsplit was reached */
1611 /* Skip any remaining whitespace and copy to beginning of string */
1612 RSKIP_SPACE(s, i);
1613 if (i >= 0)
1614 SPLIT_ADD(s, 0, i + 1);
1615
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001616 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001617 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001618 if (PyList_Reverse(list) < 0)
1619 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001620 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001621 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001622 Py_DECREF(list);
1623 return NULL;
1624}
1625
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001626Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001627rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001628{
Andrew Dalke525eab32006-05-26 14:00:45 +00001629 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001630 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001631 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001632
1633 if (list == NULL)
1634 return NULL;
1635
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001636 i = j = len - 1;
1637 while ((i >= 0) && (maxcount-- > 0)) {
1638 for (; i >= 0; i--) {
1639 if (s[i] == ch) {
1640 SPLIT_ADD(s, i + 1, j + 1);
1641 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001643 }
1644 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001645 }
1646 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001647 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001648 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001649 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001650 if (PyList_Reverse(list) < 0)
1651 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001652 return list;
1653
1654 onError:
1655 Py_DECREF(list);
1656 return NULL;
1657}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001658
1659PyDoc_STRVAR(rsplit__doc__,
1660"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1661\n\
1662Return a list of the words in the string S, using sep as the\n\
1663delimiter string, starting at the end of the string and working\n\
1664to the front. If maxsplit is given, at most maxsplit splits are\n\
1665done. If sep is not specified or is None, any whitespace string\n\
1666is a separator.");
1667
1668static PyObject *
1669string_rsplit(PyStringObject *self, PyObject *args)
1670{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001671 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001672 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001673 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001674 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675
Martin v. Löwis9c830762006-04-13 08:37:17 +00001676 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001677 return NULL;
1678 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001679 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 if (subobj == Py_None)
1681 return rsplit_whitespace(s, len, maxsplit);
1682 if (PyString_Check(subobj)) {
1683 sub = PyString_AS_STRING(subobj);
1684 n = PyString_GET_SIZE(subobj);
1685 }
1686#ifdef Py_USING_UNICODE
1687 else if (PyUnicode_Check(subobj))
1688 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1689#endif
1690 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1691 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001692
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 if (n == 0) {
1694 PyErr_SetString(PyExc_ValueError, "empty separator");
1695 return NULL;
1696 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001697 else if (n == 1)
1698 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001699
Andrew Dalke525eab32006-05-26 14:00:45 +00001700 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 if (list == NULL)
1702 return NULL;
1703
1704 j = len;
1705 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001706
1707 while ( (i >= 0) && (maxsplit-- > 0) ) {
1708 for (; i>=0; i--) {
1709 if (Py_STRING_MATCH(s, i, sub, n)) {
1710 SPLIT_ADD(s, i + n, j);
1711 j = i;
1712 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001714 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001716 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001717 SPLIT_ADD(s, 0, j);
1718 FIX_PREALLOC_SIZE(list);
1719 if (PyList_Reverse(list) < 0)
1720 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001721 return list;
1722
Andrew Dalke525eab32006-05-26 14:00:45 +00001723onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001724 Py_DECREF(list);
1725 return NULL;
1726}
1727
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001729PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730"S.join(sequence) -> string\n\
1731\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001732Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001733sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734
1735static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001736string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737{
1738 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001739 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001742 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001743 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001745 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746
Tim Peters19fe14e2001-01-19 03:03:47 +00001747 seq = PySequence_Fast(orig, "");
1748 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001749 return NULL;
1750 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001751
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001752 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001753 if (seqlen == 0) {
1754 Py_DECREF(seq);
1755 return PyString_FromString("");
1756 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001758 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001759 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1760 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001762 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001765
Raymond Hettinger674f2412004-08-23 23:23:54 +00001766 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001767 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001768 * Do a pre-pass to figure out the total amount of space we'll
1769 * need (sz), see whether any argument is absurd, and defer to
1770 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001772 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001773 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001774 item = PySequence_Fast_GET_ITEM(seq, i);
1775 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001776#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001777 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001778 /* Defer to Unicode join.
1779 * CAUTION: There's no gurantee that the
1780 * original sequence can be iterated over
1781 * again, so we must pass seq here.
1782 */
1783 PyObject *result;
1784 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001785 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001786 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001787 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001788#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001789 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001790 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001791 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001792 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001793 Py_DECREF(seq);
1794 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001796 sz += PyString_GET_SIZE(item);
1797 if (i != 0)
1798 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001799 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001800 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001801 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001802 Py_DECREF(seq);
1803 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001805 }
1806
1807 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001809 if (res == NULL) {
1810 Py_DECREF(seq);
1811 return NULL;
1812 }
1813
1814 /* Catenate everything. */
1815 p = PyString_AS_STRING(res);
1816 for (i = 0; i < seqlen; ++i) {
1817 size_t n;
1818 item = PySequence_Fast_GET_ITEM(seq, i);
1819 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001820 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001821 p += n;
1822 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001823 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001827
Jeremy Hylton49048292000-07-11 03:28:17 +00001828 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830}
1831
Tim Peters52e155e2001-06-16 05:42:57 +00001832PyObject *
1833_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001834{
Tim Petersa7259592001-06-16 05:11:17 +00001835 assert(sep != NULL && PyString_Check(sep));
1836 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001837 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001838}
1839
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001840Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001841string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001842{
1843 if (*end > len)
1844 *end = len;
1845 else if (*end < 0)
1846 *end += len;
1847 if (*end < 0)
1848 *end = 0;
1849 if (*start < 0)
1850 *start += len;
1851 if (*start < 0)
1852 *start = 0;
1853}
1854
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001855Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001856string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001859 const char *sub;
1860 Py_ssize_t sub_len;
1861 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001863 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1864 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 return -2;
1866 if (PyString_Check(subobj)) {
1867 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001868 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001870#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001872 return PyUnicode_Find(
1873 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001874#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001875 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001876 /* XXX - the "expected a character buffer object" is pretty
1877 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 return -2;
1879
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001880 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001881 return stringlib_find_slice(
1882 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1883 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001884 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001885 return stringlib_rfind_slice(
1886 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1887 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888}
1889
1890
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001891PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892"S.find(sub [,start [,end]]) -> int\n\
1893\n\
1894Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001895such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896arguments start and end are interpreted as in slice notation.\n\
1897\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001898Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899
1900static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001901string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001903 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904 if (result == -2)
1905 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001906 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907}
1908
1909
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001910PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911"S.index(sub [,start [,end]]) -> int\n\
1912\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001913Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914
1915static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001916string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001918 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 if (result == -2)
1920 return NULL;
1921 if (result == -1) {
1922 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001923 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 return NULL;
1925 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001926 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927}
1928
1929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001930PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931"S.rfind(sub [,start [,end]]) -> int\n\
1932\n\
1933Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001934such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935arguments start and end are interpreted as in slice notation.\n\
1936\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
1939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001940string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 if (result == -2)
1944 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001945 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946}
1947
1948
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001949PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950"S.rindex(sub [,start [,end]]) -> int\n\
1951\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953
1954static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001955string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001957 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 if (result == -2)
1959 return NULL;
1960 if (result == -1) {
1961 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001962 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 return NULL;
1964 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966}
1967
1968
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001969Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001970do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1971{
1972 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001973 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1976 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001977
1978 i = 0;
1979 if (striptype != RIGHTSTRIP) {
1980 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1981 i++;
1982 }
1983 }
1984
1985 j = len;
1986 if (striptype != LEFTSTRIP) {
1987 do {
1988 j--;
1989 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1990 j++;
1991 }
1992
1993 if (i == 0 && j == len && PyString_CheckExact(self)) {
1994 Py_INCREF(self);
1995 return (PyObject*)self;
1996 }
1997 else
1998 return PyString_FromStringAndSize(s+i, j-i);
1999}
2000
2001
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002002Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002003do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004{
2005 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002006 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 i = 0;
2009 if (striptype != RIGHTSTRIP) {
2010 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2011 i++;
2012 }
2013 }
2014
2015 j = len;
2016 if (striptype != LEFTSTRIP) {
2017 do {
2018 j--;
2019 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2020 j++;
2021 }
2022
Tim Peters8fa5dd02001-09-12 02:18:30 +00002023 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024 Py_INCREF(self);
2025 return (PyObject*)self;
2026 }
2027 else
2028 return PyString_FromStringAndSize(s+i, j-i);
2029}
2030
2031
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002032Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002033do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2034{
2035 PyObject *sep = NULL;
2036
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002037 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002038 return NULL;
2039
2040 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002041 if (PyString_Check(sep))
2042 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002043#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002044 else if (PyUnicode_Check(sep)) {
2045 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2046 PyObject *res;
2047 if (uniself==NULL)
2048 return NULL;
2049 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2050 striptype, sep);
2051 Py_DECREF(uniself);
2052 return res;
2053 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002054#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002055 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002056#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002057 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002058#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002059 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002060#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002061 STRIPNAME(striptype));
2062 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002063 }
2064
2065 return do_strip(self, striptype);
2066}
2067
2068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002069PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002070"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071\n\
2072Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002073whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002074If chars is given and not None, remove characters in chars instead.\n\
2075If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076
2077static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002080 if (PyTuple_GET_SIZE(args) == 0)
2081 return do_strip(self, BOTHSTRIP); /* Common case */
2082 else
2083 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084}
2085
2086
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002087PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002088"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002090Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002091If chars is given and not None, remove characters in chars instead.\n\
2092If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093
2094static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002095string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002097 if (PyTuple_GET_SIZE(args) == 0)
2098 return do_strip(self, LEFTSTRIP); /* Common case */
2099 else
2100 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101}
2102
2103
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002104PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002105"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002107Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002108If chars is given and not None, remove characters in chars instead.\n\
2109If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110
2111static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002112string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002114 if (PyTuple_GET_SIZE(args) == 0)
2115 return do_strip(self, RIGHTSTRIP); /* Common case */
2116 else
2117 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118}
2119
2120
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002121PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122"S.lower() -> string\n\
2123\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002126/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2127#ifndef _tolower
2128#define _tolower tolower
2129#endif
2130
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002132string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002134 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002135 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002136 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002138 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002139 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002141
2142 s = PyString_AS_STRING(newobj);
2143
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002144 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002145
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002147 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002148 if (isupper(c))
2149 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002151
Anthony Baxtera6286212006-04-11 07:42:36 +00002152 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153}
2154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002155PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156"S.upper() -> string\n\
2157\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002158Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002160#ifndef _toupper
2161#define _toupper toupper
2162#endif
2163
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002165string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002167 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002168 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002169 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002171 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002172 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002174
2175 s = PyString_AS_STRING(newobj);
2176
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002177 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002178
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002180 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002181 if (islower(c))
2182 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002184
Anthony Baxtera6286212006-04-11 07:42:36 +00002185 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186}
2187
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002189"S.title() -> string\n\
2190\n\
2191Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002192characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193
2194static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002195string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196{
2197 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002198 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002200 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201
Anthony Baxtera6286212006-04-11 07:42:36 +00002202 newobj = PyString_FromStringAndSize(NULL, n);
2203 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002205 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 for (i = 0; i < n; i++) {
2207 int c = Py_CHARMASK(*s++);
2208 if (islower(c)) {
2209 if (!previous_is_cased)
2210 c = toupper(c);
2211 previous_is_cased = 1;
2212 } else if (isupper(c)) {
2213 if (previous_is_cased)
2214 c = tolower(c);
2215 previous_is_cased = 1;
2216 } else
2217 previous_is_cased = 0;
2218 *s_new++ = c;
2219 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221}
2222
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002223PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224"S.capitalize() -> string\n\
2225\n\
2226Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002227capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228
2229static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002230string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231{
2232 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002233 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002234 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235
Anthony Baxtera6286212006-04-11 07:42:36 +00002236 newobj = PyString_FromStringAndSize(NULL, n);
2237 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002239 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 if (0 < n) {
2241 int c = Py_CHARMASK(*s++);
2242 if (islower(c))
2243 *s_new = toupper(c);
2244 else
2245 *s_new = c;
2246 s_new++;
2247 }
2248 for (i = 1; i < n; i++) {
2249 int c = Py_CHARMASK(*s++);
2250 if (isupper(c))
2251 *s_new = tolower(c);
2252 else
2253 *s_new = c;
2254 s_new++;
2255 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002256 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257}
2258
2259
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261"S.count(sub[, start[, end]]) -> int\n\
2262\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002263Return the number of non-overlapping occurrences of substring sub in\n\
2264string S[start:end]. Optional arguments start and end are interpreted\n\
2265as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266
2267static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002268string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002270 PyObject *sub_obj;
2271 const char *str = PyString_AS_STRING(self), *sub;
2272 Py_ssize_t sub_len;
2273 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002275 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2276 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002278
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002279 if (PyString_Check(sub_obj)) {
2280 sub = PyString_AS_STRING(sub_obj);
2281 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002283#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002284 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002285 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002286 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002287 if (count == -1)
2288 return NULL;
2289 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002290 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002291 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002292#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002293 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 return NULL;
2295
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002296 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002297
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002298 return PyInt_FromSsize_t(
2299 stringlib_count(str + start, end - start, sub, sub_len)
2300 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301}
2302
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002303PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304"S.swapcase() -> string\n\
2305\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002307converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308
2309static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002310string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311{
2312 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002313 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002314 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315
Anthony Baxtera6286212006-04-11 07:42:36 +00002316 newobj = PyString_FromStringAndSize(NULL, n);
2317 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002319 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 for (i = 0; i < n; i++) {
2321 int c = Py_CHARMASK(*s++);
2322 if (islower(c)) {
2323 *s_new = toupper(c);
2324 }
2325 else if (isupper(c)) {
2326 *s_new = tolower(c);
2327 }
2328 else
2329 *s_new = c;
2330 s_new++;
2331 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002332 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333}
2334
2335
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002336PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337"S.translate(table [,deletechars]) -> string\n\
2338\n\
2339Return a copy of the string S, where all characters occurring\n\
2340in the optional argument deletechars are removed, and the\n\
2341remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002342translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343
2344static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002345string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347 register char *input, *output;
2348 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002349 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002352 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 PyObject *result;
2354 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002357 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360
2361 if (PyString_Check(tableobj)) {
2362 table1 = PyString_AS_STRING(tableobj);
2363 tablen = PyString_GET_SIZE(tableobj);
2364 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002365#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002366 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002367 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 parameter; instead a mapping to None will cause characters
2369 to be deleted. */
2370 if (delobj != NULL) {
2371 PyErr_SetString(PyExc_TypeError,
2372 "deletions are implemented differently for unicode");
2373 return NULL;
2374 }
2375 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2376 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002377#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380
Martin v. Löwis00b61272002-12-12 20:03:19 +00002381 if (tablen != 256) {
2382 PyErr_SetString(PyExc_ValueError,
2383 "translation table must be 256 characters long");
2384 return NULL;
2385 }
2386
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 if (delobj != NULL) {
2388 if (PyString_Check(delobj)) {
2389 del_table = PyString_AS_STRING(delobj);
2390 dellen = PyString_GET_SIZE(delobj);
2391 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002392#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 else if (PyUnicode_Check(delobj)) {
2394 PyErr_SetString(PyExc_TypeError,
2395 "deletions are implemented differently for unicode");
2396 return NULL;
2397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002398#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002399 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2400 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 }
2402 else {
2403 del_table = NULL;
2404 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405 }
2406
2407 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002408 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 result = PyString_FromStringAndSize((char *)NULL, inlen);
2410 if (result == NULL)
2411 return NULL;
2412 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002413 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414
2415 if (dellen == 0) {
2416 /* If no deletions are required, use faster code */
2417 for (i = inlen; --i >= 0; ) {
2418 c = Py_CHARMASK(*input++);
2419 if (Py_CHARMASK((*output++ = table[c])) != c)
2420 changed = 1;
2421 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002422 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 return result;
2424 Py_DECREF(result);
2425 Py_INCREF(input_obj);
2426 return input_obj;
2427 }
2428
2429 for (i = 0; i < 256; i++)
2430 trans_table[i] = Py_CHARMASK(table[i]);
2431
2432 for (i = 0; i < dellen; i++)
2433 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2434
2435 for (i = inlen; --i >= 0; ) {
2436 c = Py_CHARMASK(*input++);
2437 if (trans_table[c] != -1)
2438 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2439 continue;
2440 changed = 1;
2441 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002442 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 Py_DECREF(result);
2444 Py_INCREF(input_obj);
2445 return input_obj;
2446 }
2447 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002448 if (inlen > 0)
2449 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450 return result;
2451}
2452
2453
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002454#define FORWARD 1
2455#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002457/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002459#define findchar(target, target_len, c) \
2460 ((char *)memchr((const void *)(target), c, target_len))
2461
2462/* String ops must return a string. */
2463/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002464Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002465return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002467 if (PyString_CheckExact(self)) {
2468 Py_INCREF(self);
2469 return self;
2470 }
2471 return (PyStringObject *)PyString_FromStringAndSize(
2472 PyString_AS_STRING(self),
2473 PyString_GET_SIZE(self));
2474}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002476Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002477countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002478{
2479 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002480 const char *start=target;
2481 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483 while ( (start=findchar(start, end-start, c)) != NULL ) {
2484 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002485 if (count >= maxcount)
2486 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002487 start += 1;
2488 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002489 return count;
2490}
2491
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002492Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002493findstring(const char *target, Py_ssize_t target_len,
2494 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002495 Py_ssize_t start,
2496 Py_ssize_t end,
2497 int direction)
2498{
2499 if (start < 0) {
2500 start += target_len;
2501 if (start < 0)
2502 start = 0;
2503 }
2504 if (end > target_len) {
2505 end = target_len;
2506 } else if (end < 0) {
2507 end += target_len;
2508 if (end < 0)
2509 end = 0;
2510 }
2511
2512 /* zero-length substrings always match at the first attempt */
2513 if (pattern_len == 0)
2514 return (direction > 0) ? start : end;
2515
2516 end -= pattern_len;
2517
2518 if (direction < 0) {
2519 for (; end >= start; end--)
2520 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2521 return end;
2522 } else {
2523 for (; start <= end; start++)
2524 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2525 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526 }
2527 return -1;
2528}
2529
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002530Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002531countstring(const char *target, Py_ssize_t target_len,
2532 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002533 Py_ssize_t start,
2534 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002535 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002537 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002538
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002539 if (start < 0) {
2540 start += target_len;
2541 if (start < 0)
2542 start = 0;
2543 }
2544 if (end > target_len) {
2545 end = target_len;
2546 } else if (end < 0) {
2547 end += target_len;
2548 if (end < 0)
2549 end = 0;
2550 }
2551
2552 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002553 if (pattern_len == 0 || maxcount == 0) {
2554 if (target_len+1 < maxcount)
2555 return target_len+1;
2556 return maxcount;
2557 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002558
2559 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002561 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002562 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2563 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002564 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002565 end -= pattern_len-1;
2566 }
2567 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002568 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002569 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2570 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002571 if (--maxcount <= 0)
2572 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002573 start += pattern_len-1;
2574 }
2575 }
2576 return count;
2577}
2578
2579
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002580/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002581
2582/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002583Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002585 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586 Py_ssize_t maxcount)
2587{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002588 char *self_s, *result_s;
2589 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002590 Py_ssize_t count, i, product;
2591 PyStringObject *result;
2592
2593 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002594
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002595 /* 1 at the end plus 1 after every character */
2596 count = self_len+1;
2597 if (maxcount < count)
2598 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002599
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002600 /* Check for overflow */
2601 /* result_len = count * to_len + self_len; */
2602 product = count * to_len;
2603 if (product / to_len != count) {
2604 PyErr_SetString(PyExc_OverflowError,
2605 "replace string is too long");
2606 return NULL;
2607 }
2608 result_len = product + self_len;
2609 if (result_len < 0) {
2610 PyErr_SetString(PyExc_OverflowError,
2611 "replace string is too long");
2612 return NULL;
2613 }
2614
2615 if (! (result = (PyStringObject *)
2616 PyString_FromStringAndSize(NULL, result_len)) )
2617 return NULL;
2618
2619 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002620 result_s = PyString_AS_STRING(result);
2621
2622 /* TODO: special case single character, which doesn't need memcpy */
2623
2624 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002625 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002626 result_s += to_len;
2627 count -= 1;
2628
2629 for (i=0; i<count; i++) {
2630 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002631 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002632 result_s += to_len;
2633 }
2634
2635 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002636 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002637
2638 return result;
2639}
2640
2641/* Special case for deleting a single character */
2642/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002643Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002644replace_delete_single_character(PyStringObject *self,
2645 char from_c, Py_ssize_t maxcount)
2646{
2647 char *self_s, *result_s;
2648 char *start, *next, *end;
2649 Py_ssize_t self_len, result_len;
2650 Py_ssize_t count;
2651 PyStringObject *result;
2652
2653 self_len = PyString_GET_SIZE(self);
2654 self_s = PyString_AS_STRING(self);
2655
Andrew Dalke51324072006-05-26 20:25:22 +00002656 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002657 if (count == 0) {
2658 return return_self(self);
2659 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002660
2661 result_len = self_len - count; /* from_len == 1 */
2662 assert(result_len>=0);
2663
2664 if ( (result = (PyStringObject *)
2665 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2666 return NULL;
2667 result_s = PyString_AS_STRING(result);
2668
2669 start = self_s;
2670 end = self_s + self_len;
2671 while (count-- > 0) {
2672 next = findchar(start, end-start, from_c);
2673 if (next == NULL)
2674 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002675 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002676 result_s += (next-start);
2677 start = next+1;
2678 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002679 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002680
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002681 return result;
2682}
2683
2684/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2685
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002686Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002687replace_delete_substring(PyStringObject *self,
2688 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002689 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002690 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002691 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002692 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002693 Py_ssize_t count, offset;
2694 PyStringObject *result;
2695
2696 self_len = PyString_GET_SIZE(self);
2697 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002698
2699 count = countstring(self_s, self_len,
2700 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002701 0, self_len, 1,
2702 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002703
2704 if (count == 0) {
2705 /* no matches */
2706 return return_self(self);
2707 }
2708
2709 result_len = self_len - (count * from_len);
2710 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002711
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002712 if ( (result = (PyStringObject *)
2713 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2714 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002715
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002717
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718 start = self_s;
2719 end = self_s + self_len;
2720 while (count-- > 0) {
2721 offset = findstring(start, end-start,
2722 from_s, from_len,
2723 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002724 if (offset == -1)
2725 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002726 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002727
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002728 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002729
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002730 result_s += (next-start);
2731 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002732 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002733 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002734 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002735}
2736
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002738Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002739replace_single_character_in_place(PyStringObject *self,
2740 char from_c, char to_c,
2741 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002743 char *self_s, *result_s, *start, *end, *next;
2744 Py_ssize_t self_len;
2745 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002746
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002747 /* The result string will be the same size */
2748 self_s = PyString_AS_STRING(self);
2749 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002750
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002752
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002753 if (next == NULL) {
2754 /* No matches; return the original string */
2755 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002756 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002757
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002759 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002760 if (result == NULL)
2761 return NULL;
2762 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002763 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 /* change everything in-place, starting with this one */
2766 start = result_s + (next-self_s);
2767 *start = to_c;
2768 start++;
2769 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002770
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 while (--maxcount > 0) {
2772 next = findchar(start, end-start, from_c);
2773 if (next == NULL)
2774 break;
2775 *next = to_c;
2776 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002777 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002778
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002779 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002780}
2781
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002782/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002783Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002784replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002785 const char *from_s, Py_ssize_t from_len,
2786 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002787 Py_ssize_t maxcount)
2788{
2789 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002790 char *self_s;
2791 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002792 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002793
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002794 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002795
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796 self_s = PyString_AS_STRING(self);
2797 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002798
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002799 offset = findstring(self_s, self_len,
2800 from_s, from_len,
2801 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802 if (offset == -1) {
2803 /* No matches; return the original string */
2804 return return_self(self);
2805 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002806
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002807 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002808 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002809 if (result == NULL)
2810 return NULL;
2811 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002812 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002813
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 /* change everything in-place, starting with this one */
2815 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002816 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817 start += from_len;
2818 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002819
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820 while ( --maxcount > 0) {
2821 offset = findstring(start, end-start,
2822 from_s, from_len,
2823 0, end-start, FORWARD);
2824 if (offset==-1)
2825 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002826 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 start += offset+from_len;
2828 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002829
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002830 return result;
2831}
2832
2833/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002834Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835replace_single_character(PyStringObject *self,
2836 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002837 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002838 Py_ssize_t maxcount)
2839{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002840 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002842 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002843 Py_ssize_t count, product;
2844 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002845
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002848
Andrew Dalke51324072006-05-26 20:25:22 +00002849 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850 if (count == 0) {
2851 /* no matches, return unchanged */
2852 return return_self(self);
2853 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002854
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855 /* use the difference between current and new, hence the "-1" */
2856 /* result_len = self_len + count * (to_len-1) */
2857 product = count * (to_len-1);
2858 if (product / (to_len-1) != count) {
2859 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2860 return NULL;
2861 }
2862 result_len = self_len + product;
2863 if (result_len < 0) {
2864 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2865 return NULL;
2866 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002867
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002868 if ( (result = (PyStringObject *)
2869 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2870 return NULL;
2871 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002872
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873 start = self_s;
2874 end = self_s + self_len;
2875 while (count-- > 0) {
2876 next = findchar(start, end-start, from_c);
2877 if (next == NULL)
2878 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002879
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002880 if (next == start) {
2881 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002882 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 result_s += to_len;
2884 start += 1;
2885 } else {
2886 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002887 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002889 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002890 result_s += to_len;
2891 start = next+1;
2892 }
2893 }
2894 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002895 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002896
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002897 return result;
2898}
2899
2900/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002901Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002902replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002903 const char *from_s, Py_ssize_t from_len,
2904 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002905 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002906 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002907 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002908 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909 Py_ssize_t count, offset, product;
2910 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002911
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 self_s = PyString_AS_STRING(self);
2913 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002914
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915 count = countstring(self_s, self_len,
2916 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002917 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002918 if (count == 0) {
2919 /* no matches, return unchanged */
2920 return return_self(self);
2921 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002922
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923 /* Check for overflow */
2924 /* result_len = self_len + count * (to_len-from_len) */
2925 product = count * (to_len-from_len);
2926 if (product / (to_len-from_len) != count) {
2927 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2928 return NULL;
2929 }
2930 result_len = self_len + product;
2931 if (result_len < 0) {
2932 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2933 return NULL;
2934 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002935
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002936 if ( (result = (PyStringObject *)
2937 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2938 return NULL;
2939 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002940
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002941 start = self_s;
2942 end = self_s + self_len;
2943 while (count-- > 0) {
2944 offset = findstring(start, end-start,
2945 from_s, from_len,
2946 0, end-start, FORWARD);
2947 if (offset == -1)
2948 break;
2949 next = start+offset;
2950 if (next == start) {
2951 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002952 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953 result_s += to_len;
2954 start += from_len;
2955 } else {
2956 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002957 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002959 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002960 result_s += to_len;
2961 start = next+from_len;
2962 }
2963 }
2964 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002965 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002966
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002967 return result;
2968}
2969
2970
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002971Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002972replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002973 const char *from_s, Py_ssize_t from_len,
2974 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002975 Py_ssize_t maxcount)
2976{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002977 if (maxcount < 0) {
2978 maxcount = PY_SSIZE_T_MAX;
2979 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2980 /* nothing to do; return the original string */
2981 return return_self(self);
2982 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002983
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002984 if (maxcount == 0 ||
2985 (from_len == 0 && to_len == 0)) {
2986 /* nothing to do; return the original string */
2987 return return_self(self);
2988 }
2989
2990 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002991
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002992 if (from_len == 0) {
2993 /* insert the 'to' string everywhere. */
2994 /* >>> "Python".replace("", ".") */
2995 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002996 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002997 }
2998
2999 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3000 /* point for an empty self string to generate a non-empty string */
3001 /* Special case so the remaining code always gets a non-empty string */
3002 if (PyString_GET_SIZE(self) == 0) {
3003 return return_self(self);
3004 }
3005
3006 if (to_len == 0) {
3007 /* delete all occurances of 'from' string */
3008 if (from_len == 1) {
3009 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003010 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003011 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003012 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003013 }
3014 }
3015
3016 /* Handle special case where both strings have the same length */
3017
3018 if (from_len == to_len) {
3019 if (from_len == 1) {
3020 return replace_single_character_in_place(
3021 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003022 from_s[0],
3023 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 maxcount);
3025 } else {
3026 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003027 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003028 }
3029 }
3030
3031 /* Otherwise use the more generic algorithms */
3032 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003033 return replace_single_character(self, from_s[0],
3034 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003035 } else {
3036 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003037 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003038 }
3039}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003041PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003042"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003043\n\
3044Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003045old replaced by new. If the optional argument count is\n\
3046given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003047
3048static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003049string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003050{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003051 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003052 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003053 const char *from_s, *to_s;
3054 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003055
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003056 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003057 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003059 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003060 from_s = PyString_AS_STRING(from);
3061 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003062 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003063#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003065 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003066 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003067#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003068 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 return NULL;
3070
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003071 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003072 to_s = PyString_AS_STRING(to);
3073 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003075#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003076 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003077 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003078 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003079#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003080 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003081 return NULL;
3082
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003083 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003084 from_s, from_len,
3085 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086}
3087
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003088/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003089
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003090/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003091 * against substr, using the start and end arguments. Returns
3092 * -1 on error, 0 if not found and 1 if found.
3093 */
3094Py_LOCAL(int)
3095_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3096 Py_ssize_t end, int direction)
3097{
3098 Py_ssize_t len = PyString_GET_SIZE(self);
3099 Py_ssize_t slen;
3100 const char* sub;
3101 const char* str;
3102
3103 if (PyString_Check(substr)) {
3104 sub = PyString_AS_STRING(substr);
3105 slen = PyString_GET_SIZE(substr);
3106 }
3107#ifdef Py_USING_UNICODE
3108 else if (PyUnicode_Check(substr))
3109 return PyUnicode_Tailmatch((PyObject *)self,
3110 substr, start, end, direction);
3111#endif
3112 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3113 return -1;
3114 str = PyString_AS_STRING(self);
3115
3116 string_adjust_indices(&start, &end, len);
3117
3118 if (direction < 0) {
3119 /* startswith */
3120 if (start+slen > len)
3121 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003122 } else {
3123 /* endswith */
3124 if (end-start < slen || start > len)
3125 return 0;
3126
3127 if (end-slen > start)
3128 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003129 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003130 if (end-start >= slen)
3131 return ! memcmp(str+start, sub, slen);
3132 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003133}
3134
3135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003136PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003137"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003138\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003139Return True if S starts with the specified prefix, False otherwise.\n\
3140With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003141With optional end, stop comparing S at that position.\n\
3142prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003143
3144static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003145string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003146{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003147 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003148 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003150 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003151
Guido van Rossumc6821402000-05-08 14:08:05 +00003152 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3153 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003155 if (PyTuple_Check(subobj)) {
3156 Py_ssize_t i;
3157 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3158 result = _string_tailmatch(self,
3159 PyTuple_GET_ITEM(subobj, i),
3160 start, end, -1);
3161 if (result == -1)
3162 return NULL;
3163 else if (result) {
3164 Py_RETURN_TRUE;
3165 }
3166 }
3167 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168 }
Georg Brandl24250812006-06-09 18:45:48 +00003169 result = _string_tailmatch(self, subobj, start, end, -1);
3170 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003172 else
Georg Brandl24250812006-06-09 18:45:48 +00003173 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174}
3175
3176
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003177PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003178"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003180Return True if S ends with the specified suffix, False otherwise.\n\
3181With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003182With optional end, stop comparing S at that position.\n\
3183suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003184
3185static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003186string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003187{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003188 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003189 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003191 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003192
Guido van Rossumc6821402000-05-08 14:08:05 +00003193 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3194 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003196 if (PyTuple_Check(subobj)) {
3197 Py_ssize_t i;
3198 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3199 result = _string_tailmatch(self,
3200 PyTuple_GET_ITEM(subobj, i),
3201 start, end, +1);
3202 if (result == -1)
3203 return NULL;
3204 else if (result) {
3205 Py_RETURN_TRUE;
3206 }
3207 }
3208 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003209 }
Georg Brandl24250812006-06-09 18:45:48 +00003210 result = _string_tailmatch(self, subobj, start, end, +1);
3211 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003213 else
Georg Brandl24250812006-06-09 18:45:48 +00003214 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003215}
3216
3217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003218PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003219"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003220\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003221Encodes S using the codec registered for encoding. encoding defaults\n\
3222to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003223handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003224a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3225'xmlcharrefreplace' as well as any other name registered with\n\
3226codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003227
3228static PyObject *
3229string_encode(PyStringObject *self, PyObject *args)
3230{
3231 char *encoding = NULL;
3232 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003233 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003234
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003235 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3236 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003237 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003238 if (v == NULL)
3239 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003240 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3241 PyErr_Format(PyExc_TypeError,
3242 "encoder did not return a string/unicode object "
3243 "(type=%.400s)",
3244 v->ob_type->tp_name);
3245 Py_DECREF(v);
3246 return NULL;
3247 }
3248 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003249
3250 onError:
3251 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003252}
3253
3254
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003255PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003256"S.decode([encoding[,errors]]) -> object\n\
3257\n\
3258Decodes S using the codec registered for encoding. encoding defaults\n\
3259to the default encoding. errors may be given to set a different error\n\
3260handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003261a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3262as well as any other name registerd with codecs.register_error that is\n\
3263able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003264
3265static PyObject *
3266string_decode(PyStringObject *self, PyObject *args)
3267{
3268 char *encoding = NULL;
3269 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003270 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003271
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003272 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3273 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003274 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003275 if (v == NULL)
3276 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003277 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3278 PyErr_Format(PyExc_TypeError,
3279 "decoder did not return a string/unicode object "
3280 "(type=%.400s)",
3281 v->ob_type->tp_name);
3282 Py_DECREF(v);
3283 return NULL;
3284 }
3285 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003286
3287 onError:
3288 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003289}
3290
3291
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003292PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003293"S.expandtabs([tabsize]) -> string\n\
3294\n\
3295Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003296If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003297
3298static PyObject*
3299string_expandtabs(PyStringObject *self, PyObject *args)
3300{
3301 const char *e, *p;
3302 char *q;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003303 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003304 PyObject *u;
3305 int tabsize = 8;
3306
3307 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3308 return NULL;
3309
Thomas Wouters7e474022000-07-16 12:04:32 +00003310 /* First pass: determine size of output string */
Neal Norwitz66e64e22007-06-09 04:06:30 +00003311 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003312 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3313 for (p = PyString_AS_STRING(self); p < e; p++)
3314 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003315 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003316 j += tabsize - (j % tabsize);
Neal Norwitz66e64e22007-06-09 04:06:30 +00003317 if (old_j > j) {
Neal Norwitz8355dd52007-06-11 04:32:41 +00003318 PyErr_SetString(PyExc_OverflowError,
3319 "new string is too long");
Neal Norwitz66e64e22007-06-09 04:06:30 +00003320 return NULL;
3321 }
3322 old_j = j;
3323 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324 }
3325 else {
3326 j++;
3327 if (*p == '\n' || *p == '\r') {
3328 i += j;
Neal Norwitz8355dd52007-06-11 04:32:41 +00003329 old_j = j = 0;
3330 if (i < 0) {
3331 PyErr_SetString(PyExc_OverflowError,
3332 "new string is too long");
3333 return NULL;
3334 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003335 }
3336 }
3337
Neal Norwitz66e64e22007-06-09 04:06:30 +00003338 if ((i + j) < 0) {
3339 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3340 return NULL;
3341 }
3342
Guido van Rossum4c08d552000-03-10 22:55:18 +00003343 /* Second pass: create output string and fill it */
3344 u = PyString_FromStringAndSize(NULL, i + j);
3345 if (!u)
3346 return NULL;
3347
3348 j = 0;
3349 q = PyString_AS_STRING(u);
3350
3351 for (p = PyString_AS_STRING(self); p < e; p++)
3352 if (*p == '\t') {
3353 if (tabsize > 0) {
3354 i = tabsize - (j % tabsize);
3355 j += i;
3356 while (i--)
3357 *q++ = ' ';
3358 }
3359 }
3360 else {
3361 j++;
3362 *q++ = *p;
3363 if (*p == '\n' || *p == '\r')
3364 j = 0;
3365 }
3366
3367 return u;
3368}
3369
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003370Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003371pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372{
3373 PyObject *u;
3374
3375 if (left < 0)
3376 left = 0;
3377 if (right < 0)
3378 right = 0;
3379
Tim Peters8fa5dd02001-09-12 02:18:30 +00003380 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381 Py_INCREF(self);
3382 return (PyObject *)self;
3383 }
3384
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003385 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386 left + PyString_GET_SIZE(self) + right);
3387 if (u) {
3388 if (left)
3389 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003390 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003391 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003392 PyString_GET_SIZE(self));
3393 if (right)
3394 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3395 fill, right);
3396 }
3397
3398 return u;
3399}
3400
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003401PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003402"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003403"\n"
3404"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003405"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406
3407static PyObject *
3408string_ljust(PyStringObject *self, PyObject *args)
3409{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003410 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003411 char fillchar = ' ';
3412
Thomas Wouters4abb3662006-04-19 14:50:15 +00003413 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414 return NULL;
3415
Tim Peters8fa5dd02001-09-12 02:18:30 +00003416 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003417 Py_INCREF(self);
3418 return (PyObject*) self;
3419 }
3420
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422}
3423
3424
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003425PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003426"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003427"\n"
3428"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430
3431static PyObject *
3432string_rjust(PyStringObject *self, PyObject *args)
3433{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003434 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003435 char fillchar = ' ';
3436
Thomas Wouters4abb3662006-04-19 14:50:15 +00003437 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438 return NULL;
3439
Tim Peters8fa5dd02001-09-12 02:18:30 +00003440 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003441 Py_INCREF(self);
3442 return (PyObject*) self;
3443 }
3444
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446}
3447
3448
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003449PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003450"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003451"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003452"Return S centered in a string of length width. Padding is\n"
3453"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454
3455static PyObject *
3456string_center(PyStringObject *self, PyObject *args)
3457{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003458 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003459 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003460 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461
Thomas Wouters4abb3662006-04-19 14:50:15 +00003462 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003463 return NULL;
3464
Tim Peters8fa5dd02001-09-12 02:18:30 +00003465 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466 Py_INCREF(self);
3467 return (PyObject*) self;
3468 }
3469
3470 marg = width - PyString_GET_SIZE(self);
3471 left = marg / 2 + (marg & width & 1);
3472
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474}
3475
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003476PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003477"S.zfill(width) -> string\n"
3478"\n"
3479"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003480"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003481
3482static PyObject *
3483string_zfill(PyStringObject *self, PyObject *args)
3484{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003485 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003486 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003487 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003488 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003489
Thomas Wouters4abb3662006-04-19 14:50:15 +00003490 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003491 return NULL;
3492
3493 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003494 if (PyString_CheckExact(self)) {
3495 Py_INCREF(self);
3496 return (PyObject*) self;
3497 }
3498 else
3499 return PyString_FromStringAndSize(
3500 PyString_AS_STRING(self),
3501 PyString_GET_SIZE(self)
3502 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003503 }
3504
3505 fill = width - PyString_GET_SIZE(self);
3506
3507 s = pad(self, fill, 0, '0');
3508
3509 if (s == NULL)
3510 return NULL;
3511
3512 p = PyString_AS_STRING(s);
3513 if (p[fill] == '+' || p[fill] == '-') {
3514 /* move sign to beginning of string */
3515 p[0] = p[fill];
3516 p[fill] = '0';
3517 }
3518
3519 return (PyObject*) s;
3520}
3521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003522PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003523"S.isspace() -> bool\n\
3524\n\
3525Return True if all characters in S are whitespace\n\
3526and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003527
3528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003529string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530{
Fred Drakeba096332000-07-09 07:04:36 +00003531 register const unsigned char *p
3532 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003533 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535 /* Shortcut for single character strings */
3536 if (PyString_GET_SIZE(self) == 1 &&
3537 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003539
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003540 /* Special case for empty strings */
3541 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003542 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003543
Guido van Rossum4c08d552000-03-10 22:55:18 +00003544 e = p + PyString_GET_SIZE(self);
3545 for (; p < e; p++) {
3546 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003547 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550}
3551
3552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003553PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003555\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003556Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003557and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558
3559static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003560string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561{
Fred Drakeba096332000-07-09 07:04:36 +00003562 register const unsigned char *p
3563 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003564 register const unsigned char *e;
3565
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566 /* Shortcut for single character strings */
3567 if (PyString_GET_SIZE(self) == 1 &&
3568 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571 /* Special case for empty strings */
3572 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574
3575 e = p + PyString_GET_SIZE(self);
3576 for (; p < e; p++) {
3577 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581}
3582
3583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003584PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003586\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003587Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003588and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589
3590static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003591string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592{
Fred Drakeba096332000-07-09 07:04:36 +00003593 register const unsigned char *p
3594 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003595 register const unsigned char *e;
3596
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597 /* Shortcut for single character strings */
3598 if (PyString_GET_SIZE(self) == 1 &&
3599 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003601
3602 /* Special case for empty strings */
3603 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605
3606 e = p + PyString_GET_SIZE(self);
3607 for (; p < e; p++) {
3608 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003612}
3613
3614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003618Return True if all characters in S are digits\n\
3619and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620
3621static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003622string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623{
Fred Drakeba096332000-07-09 07:04:36 +00003624 register const unsigned char *p
3625 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003626 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628 /* Shortcut for single character strings */
3629 if (PyString_GET_SIZE(self) == 1 &&
3630 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003633 /* Special case for empty strings */
3634 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003636
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637 e = p + PyString_GET_SIZE(self);
3638 for (; p < e; p++) {
3639 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643}
3644
3645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003646PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003650at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651
3652static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003653string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654{
Fred Drakeba096332000-07-09 07:04:36 +00003655 register const unsigned char *p
3656 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003657 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658 int cased;
3659
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 /* Shortcut for single character strings */
3661 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003664 /* Special case for empty strings */
3665 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 e = p + PyString_GET_SIZE(self);
3669 cased = 0;
3670 for (; p < e; p++) {
3671 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 else if (!cased && islower(*p))
3674 cased = 1;
3675 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677}
3678
3679
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003680PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003683Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003684at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685
3686static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003687string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688{
Fred Drakeba096332000-07-09 07:04:36 +00003689 register const unsigned char *p
3690 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003691 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692 int cased;
3693
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694 /* Shortcut for single character strings */
3695 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003698 /* Special case for empty strings */
3699 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003700 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003701
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 e = p + PyString_GET_SIZE(self);
3703 cased = 0;
3704 for (; p < e; p++) {
3705 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 else if (!cased && isupper(*p))
3708 cased = 1;
3709 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711}
3712
3713
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003714PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003717Return True if S is a titlecased string and there is at least one\n\
3718character in S, i.e. uppercase characters may only follow uncased\n\
3719characters and lowercase characters only cased ones. Return False\n\
3720otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721
3722static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003723string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724{
Fred Drakeba096332000-07-09 07:04:36 +00003725 register const unsigned char *p
3726 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003727 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 int cased, previous_is_cased;
3729
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 /* Shortcut for single character strings */
3731 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003734 /* Special case for empty strings */
3735 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003737
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 e = p + PyString_GET_SIZE(self);
3739 cased = 0;
3740 previous_is_cased = 0;
3741 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003742 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
3744 if (isupper(ch)) {
3745 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003746 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 previous_is_cased = 1;
3748 cased = 1;
3749 }
3750 else if (islower(ch)) {
3751 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003752 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753 previous_is_cased = 1;
3754 cased = 1;
3755 }
3756 else
3757 previous_is_cased = 0;
3758 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760}
3761
3762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003763PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003764"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765\n\
3766Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003767Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003768is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770static PyObject*
3771string_splitlines(PyStringObject *self, PyObject *args)
3772{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003773 register Py_ssize_t i;
3774 register Py_ssize_t j;
3775 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003776 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 PyObject *list;
3778 PyObject *str;
3779 char *data;
3780
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003781 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 return NULL;
3783
3784 data = PyString_AS_STRING(self);
3785 len = PyString_GET_SIZE(self);
3786
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003787 /* This does not use the preallocated list because splitlines is
3788 usually run with hundreds of newlines. The overhead of
3789 switching between PyList_SET_ITEM and append causes about a
3790 2-3% slowdown for that common case. A smarter implementation
3791 could move the if check out, so the SET_ITEMs are done first
3792 and the appends only done when the prealloc buffer is full.
3793 That's too much work for little gain.*/
3794
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795 list = PyList_New(0);
3796 if (!list)
3797 goto onError;
3798
3799 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003800 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003801
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802 /* Find a line and append it */
3803 while (i < len && data[i] != '\n' && data[i] != '\r')
3804 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805
3806 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003807 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 if (i < len) {
3809 if (data[i] == '\r' && i + 1 < len &&
3810 data[i+1] == '\n')
3811 i += 2;
3812 else
3813 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003814 if (keepends)
3815 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003816 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003817 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818 j = i;
3819 }
3820 if (j < len) {
3821 SPLIT_APPEND(data, j, len);
3822 }
3823
3824 return list;
3825
3826 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003827 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003828 return NULL;
3829}
3830
3831#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003832#undef SPLIT_ADD
3833#undef MAX_PREALLOC
3834#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003835
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003836static PyObject *
3837string_getnewargs(PyStringObject *v)
3838{
3839 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3840}
3841
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003842
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003843static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003844string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003845 /* Counterparts of the obsolete stropmodule functions; except
3846 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003847 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3848 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003849 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003850 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3851 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003852 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3853 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3854 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3855 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3856 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3857 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3858 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003859 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3860 capitalize__doc__},
3861 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3862 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3863 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003864 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003865 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3866 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3867 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3868 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3869 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3870 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3871 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003872 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3873 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003874 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3875 startswith__doc__},
3876 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3877 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3878 swapcase__doc__},
3879 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3880 translate__doc__},
3881 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3882 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3883 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3884 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3885 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3886 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3887 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3888 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3889 expandtabs__doc__},
3890 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3891 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003892 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003893 {NULL, NULL} /* sentinel */
3894};
3895
Jeremy Hylton938ace62002-07-17 16:30:39 +00003896static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003897str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3898
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003899static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003900string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003901{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003902 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003903 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003904
Guido van Rossumae960af2001-08-30 03:11:59 +00003905 if (type != &PyString_Type)
3906 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003907 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3908 return NULL;
3909 if (x == NULL)
3910 return PyString_FromString("");
3911 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003912}
3913
Guido van Rossumae960af2001-08-30 03:11:59 +00003914static PyObject *
3915str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3916{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003917 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003918 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003919
3920 assert(PyType_IsSubtype(type, &PyString_Type));
3921 tmp = string_new(&PyString_Type, args, kwds);
3922 if (tmp == NULL)
3923 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003924 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003925 n = PyString_GET_SIZE(tmp);
3926 pnew = type->tp_alloc(type, n);
3927 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003928 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003929 ((PyStringObject *)pnew)->ob_shash =
3930 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003931 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003932 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003933 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003934 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003935}
3936
Guido van Rossumcacfc072002-05-24 19:01:59 +00003937static PyObject *
3938basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3939{
3940 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003941 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003942 return NULL;
3943}
3944
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003945static PyObject *
3946string_mod(PyObject *v, PyObject *w)
3947{
3948 if (!PyString_Check(v)) {
3949 Py_INCREF(Py_NotImplemented);
3950 return Py_NotImplemented;
3951 }
3952 return PyString_Format(v, w);
3953}
3954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003955PyDoc_STRVAR(basestring_doc,
3956"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003957
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003958static PyNumberMethods string_as_number = {
3959 0, /*nb_add*/
3960 0, /*nb_subtract*/
3961 0, /*nb_multiply*/
3962 0, /*nb_divide*/
3963 string_mod, /*nb_remainder*/
3964};
3965
3966
Guido van Rossumcacfc072002-05-24 19:01:59 +00003967PyTypeObject PyBaseString_Type = {
3968 PyObject_HEAD_INIT(&PyType_Type)
3969 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003970 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003971 0,
3972 0,
3973 0, /* tp_dealloc */
3974 0, /* tp_print */
3975 0, /* tp_getattr */
3976 0, /* tp_setattr */
3977 0, /* tp_compare */
3978 0, /* tp_repr */
3979 0, /* tp_as_number */
3980 0, /* tp_as_sequence */
3981 0, /* tp_as_mapping */
3982 0, /* tp_hash */
3983 0, /* tp_call */
3984 0, /* tp_str */
3985 0, /* tp_getattro */
3986 0, /* tp_setattro */
3987 0, /* tp_as_buffer */
3988 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3989 basestring_doc, /* tp_doc */
3990 0, /* tp_traverse */
3991 0, /* tp_clear */
3992 0, /* tp_richcompare */
3993 0, /* tp_weaklistoffset */
3994 0, /* tp_iter */
3995 0, /* tp_iternext */
3996 0, /* tp_methods */
3997 0, /* tp_members */
3998 0, /* tp_getset */
3999 &PyBaseObject_Type, /* tp_base */
4000 0, /* tp_dict */
4001 0, /* tp_descr_get */
4002 0, /* tp_descr_set */
4003 0, /* tp_dictoffset */
4004 0, /* tp_init */
4005 0, /* tp_alloc */
4006 basestring_new, /* tp_new */
4007 0, /* tp_free */
4008};
4009
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004010PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011"str(object) -> string\n\
4012\n\
4013Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004014If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004015
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016PyTypeObject PyString_Type = {
4017 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004018 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004019 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004021 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004022 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004023 (printfunc)string_print, /* tp_print */
4024 0, /* tp_getattr */
4025 0, /* tp_setattr */
4026 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004027 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004028 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004029 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004030 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004031 (hashfunc)string_hash, /* tp_hash */
4032 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004033 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004034 PyObject_GenericGetAttr, /* tp_getattro */
4035 0, /* tp_setattro */
4036 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004037 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004038 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004039 string_doc, /* tp_doc */
4040 0, /* tp_traverse */
4041 0, /* tp_clear */
4042 (richcmpfunc)string_richcompare, /* tp_richcompare */
4043 0, /* tp_weaklistoffset */
4044 0, /* tp_iter */
4045 0, /* tp_iternext */
4046 string_methods, /* tp_methods */
4047 0, /* tp_members */
4048 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004049 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004050 0, /* tp_dict */
4051 0, /* tp_descr_get */
4052 0, /* tp_descr_set */
4053 0, /* tp_dictoffset */
4054 0, /* tp_init */
4055 0, /* tp_alloc */
4056 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004057 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004058};
4059
4060void
Fred Drakeba096332000-07-09 07:04:36 +00004061PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004062{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004063 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004064 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004065 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004066 if (w == NULL || !PyString_Check(*pv)) {
4067 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004068 *pv = NULL;
4069 return;
4070 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 v = string_concat((PyStringObject *) *pv, w);
4072 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004073 *pv = v;
4074}
4075
Guido van Rossum013142a1994-08-30 08:19:36 +00004076void
Fred Drakeba096332000-07-09 07:04:36 +00004077PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004078{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 PyString_Concat(pv, w);
4080 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004081}
4082
4083
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004084/* The following function breaks the notion that strings are immutable:
4085 it changes the size of a string. We get away with this only if there
4086 is only one module referencing the object. You can also think of it
4087 as creating a new string object and destroying the old one, only
4088 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004089 already be known to some other part of the code...
4090 Note that if there's not enough memory to resize the string, the original
4091 string object at *pv is deallocated, *pv is set to NULL, an "out of
4092 memory" exception is set, and -1 is returned. Else (on success) 0 is
4093 returned, and the value in *pv may or may not be the same as on input.
4094 As always, an extra byte is allocated for a trailing \0 byte (newsize
4095 does *not* include that), and a trailing \0 byte is stored.
4096*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004097
4098int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004099_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004100{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004101 register PyObject *v;
4102 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004103 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004104 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4105 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004106 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004107 Py_DECREF(v);
4108 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004109 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004111 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004112 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 _Py_ForgetReference(v);
4114 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004115 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004116 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004117 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004118 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004119 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004120 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004121 _Py_NewReference(*pv);
4122 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004123 sv->ob_size = newsize;
4124 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004125 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004126 return 0;
4127}
Guido van Rossume5372401993-03-16 12:15:04 +00004128
4129/* Helpers for formatstring */
4130
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004131Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004132getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004133{
Thomas Wouters977485d2006-02-16 15:59:12 +00004134 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004135 if (argidx < arglen) {
4136 (*p_argidx)++;
4137 if (arglen < 0)
4138 return args;
4139 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004140 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004141 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004142 PyErr_SetString(PyExc_TypeError,
4143 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004144 return NULL;
4145}
4146
Tim Peters38fd5b62000-09-21 05:43:11 +00004147/* Format codes
4148 * F_LJUST '-'
4149 * F_SIGN '+'
4150 * F_BLANK ' '
4151 * F_ALT '#'
4152 * F_ZERO '0'
4153 */
Guido van Rossume5372401993-03-16 12:15:04 +00004154#define F_LJUST (1<<0)
4155#define F_SIGN (1<<1)
4156#define F_BLANK (1<<2)
4157#define F_ALT (1<<3)
4158#define F_ZERO (1<<4)
4159
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004160Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004161formatfloat(char *buf, size_t buflen, int flags,
4162 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004163{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 /* fmt = '%#.' + `prec` + `type`
4165 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004166 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004167 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004168 x = PyFloat_AsDouble(v);
4169 if (x == -1.0 && PyErr_Occurred()) {
4170 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004171 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004172 }
Guido van Rossume5372401993-03-16 12:15:04 +00004173 if (prec < 0)
4174 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004175 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4176 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004177 /* Worst case length calc to ensure no buffer overrun:
4178
4179 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004180 fmt = %#.<prec>g
4181 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004182 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004183 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004184
4185 'f' formats:
4186 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4187 len = 1 + 50 + 1 + prec = 52 + prec
4188
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004190 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004191
4192 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004193 if (((type == 'g' || type == 'G') &&
4194 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004195 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004196 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004197 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004198 return -1;
4199 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004200 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4201 (flags&F_ALT) ? "#" : "",
4202 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004203 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004204 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004205}
4206
Tim Peters38fd5b62000-09-21 05:43:11 +00004207/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4208 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4209 * Python's regular ints.
4210 * Return value: a new PyString*, or NULL if error.
4211 * . *pbuf is set to point into it,
4212 * *plen set to the # of chars following that.
4213 * Caller must decref it when done using pbuf.
4214 * The string starting at *pbuf is of the form
4215 * "-"? ("0x" | "0X")? digit+
4216 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004217 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004218 * There will be at least prec digits, zero-filled on the left if
4219 * necessary to get that many.
4220 * val object to be converted
4221 * flags bitmask of format flags; only F_ALT is looked at
4222 * prec minimum number of digits; 0-fill on left if needed
4223 * type a character in [duoxX]; u acts the same as d
4224 *
4225 * CAUTION: o, x and X conversions on regular ints can never
4226 * produce a '-' sign, but can for Python's unbounded ints.
4227 */
4228PyObject*
4229_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4230 char **pbuf, int *plen)
4231{
4232 PyObject *result = NULL;
4233 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004234 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004235 int sign; /* 1 if '-', else 0 */
4236 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004237 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004238 int numdigits; /* len == numnondigits + numdigits */
4239 int numnondigits = 0;
4240
4241 switch (type) {
4242 case 'd':
4243 case 'u':
4244 result = val->ob_type->tp_str(val);
4245 break;
4246 case 'o':
4247 result = val->ob_type->tp_as_number->nb_oct(val);
4248 break;
4249 case 'x':
4250 case 'X':
4251 numnondigits = 2;
4252 result = val->ob_type->tp_as_number->nb_hex(val);
4253 break;
4254 default:
4255 assert(!"'type' not in [duoxX]");
4256 }
4257 if (!result)
4258 return NULL;
4259
Neal Norwitz56423e52006-08-13 18:11:08 +00004260 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004261 if (!buf) {
4262 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004263 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004264 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004265
Tim Peters38fd5b62000-09-21 05:43:11 +00004266 /* To modify the string in-place, there can only be one reference. */
4267 if (result->ob_refcnt != 1) {
4268 PyErr_BadInternalCall();
4269 return NULL;
4270 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004271 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004272 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004273 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4274 return NULL;
4275 }
4276 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004277 if (buf[len-1] == 'L') {
4278 --len;
4279 buf[len] = '\0';
4280 }
4281 sign = buf[0] == '-';
4282 numnondigits += sign;
4283 numdigits = len - numnondigits;
4284 assert(numdigits > 0);
4285
Tim Petersfff53252001-04-12 18:38:48 +00004286 /* Get rid of base marker unless F_ALT */
4287 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004288 /* Need to skip 0x, 0X or 0. */
4289 int skipped = 0;
4290 switch (type) {
4291 case 'o':
4292 assert(buf[sign] == '0');
4293 /* If 0 is only digit, leave it alone. */
4294 if (numdigits > 1) {
4295 skipped = 1;
4296 --numdigits;
4297 }
4298 break;
4299 case 'x':
4300 case 'X':
4301 assert(buf[sign] == '0');
4302 assert(buf[sign + 1] == 'x');
4303 skipped = 2;
4304 numnondigits -= 2;
4305 break;
4306 }
4307 if (skipped) {
4308 buf += skipped;
4309 len -= skipped;
4310 if (sign)
4311 buf[0] = '-';
4312 }
4313 assert(len == numnondigits + numdigits);
4314 assert(numdigits > 0);
4315 }
4316
4317 /* Fill with leading zeroes to meet minimum width. */
4318 if (prec > numdigits) {
4319 PyObject *r1 = PyString_FromStringAndSize(NULL,
4320 numnondigits + prec);
4321 char *b1;
4322 if (!r1) {
4323 Py_DECREF(result);
4324 return NULL;
4325 }
4326 b1 = PyString_AS_STRING(r1);
4327 for (i = 0; i < numnondigits; ++i)
4328 *b1++ = *buf++;
4329 for (i = 0; i < prec - numdigits; i++)
4330 *b1++ = '0';
4331 for (i = 0; i < numdigits; i++)
4332 *b1++ = *buf++;
4333 *b1 = '\0';
4334 Py_DECREF(result);
4335 result = r1;
4336 buf = PyString_AS_STRING(result);
4337 len = numnondigits + prec;
4338 }
4339
4340 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004341 if (type == 'X') {
4342 /* Need to convert all lower case letters to upper case.
4343 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004344 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004345 if (buf[i] >= 'a' && buf[i] <= 'x')
4346 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 }
4348 *pbuf = buf;
4349 *plen = len;
4350 return result;
4351}
4352
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004353Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004354formatint(char *buf, size_t buflen, int flags,
4355 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004356{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004357 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004358 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4359 + 1 + 1 = 24 */
4360 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004361 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004362 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004363
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004364 x = PyInt_AsLong(v);
4365 if (x == -1 && PyErr_Occurred()) {
4366 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004367 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004368 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004369 if (x < 0 && type == 'u') {
4370 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004371 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004372 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4373 sign = "-";
4374 else
4375 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004376 if (prec < 0)
4377 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004378
4379 if ((flags & F_ALT) &&
4380 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004381 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004382 * of issues that cause pain:
4383 * - when 0 is being converted, the C standard leaves off
4384 * the '0x' or '0X', which is inconsistent with other
4385 * %#x/%#X conversions and inconsistent with Python's
4386 * hex() function
4387 * - there are platforms that violate the standard and
4388 * convert 0 with the '0x' or '0X'
4389 * (Metrowerks, Compaq Tru64)
4390 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004391 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004392 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004393 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004394 * We can achieve the desired consistency by inserting our
4395 * own '0x' or '0X' prefix, and substituting %x/%X in place
4396 * of %#x/%#X.
4397 *
4398 * Note that this is the same approach as used in
4399 * formatint() in unicodeobject.c
4400 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004401 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4402 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004403 }
4404 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004405 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4406 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004407 prec, type);
4408 }
4409
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004410 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4411 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004412 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004413 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004414 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004415 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004416 return -1;
4417 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004418 if (sign[0])
4419 PyOS_snprintf(buf, buflen, fmt, -x);
4420 else
4421 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004422 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004423}
4424
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004425Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004426formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004427{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004428 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004429 if (PyString_Check(v)) {
4430 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004431 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004432 }
4433 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004434 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004435 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004436 }
4437 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004438 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004439}
4440
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004441/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4442
4443 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4444 chars are formatted. XXX This is a magic number. Each formatting
4445 routine does bounds checking to ensure no overflow, but a better
4446 solution may be to malloc a buffer of appropriate size for each
4447 format. For now, the current solution is sufficient.
4448*/
4449#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004450
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004451PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004452PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004453{
4454 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004455 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004456 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004457 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004458 PyObject *result, *orig_args;
4459#ifdef Py_USING_UNICODE
4460 PyObject *v, *w;
4461#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004462 PyObject *dict = NULL;
4463 if (format == NULL || !PyString_Check(format) || args == NULL) {
4464 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004465 return NULL;
4466 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004467 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004468 fmt = PyString_AS_STRING(format);
4469 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004470 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004471 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004472 if (result == NULL)
4473 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 res = PyString_AsString(result);
4475 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004476 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004477 argidx = 0;
4478 }
4479 else {
4480 arglen = -1;
4481 argidx = -2;
4482 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004483 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4484 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004485 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004486 while (--fmtcnt >= 0) {
4487 if (*fmt != '%') {
4488 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004489 rescnt = fmtcnt + 100;
4490 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004491 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004492 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004493 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004494 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004495 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004496 }
4497 *res++ = *fmt++;
4498 }
4499 else {
4500 /* Got a format specifier */
4501 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004502 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004503 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004504 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004505 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004506 PyObject *v = NULL;
4507 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004508 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004509 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004510 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004511 char formatbuf[FORMATBUFLEN];
4512 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004513#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004514 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004515 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004516#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004517
Guido van Rossumda9c2711996-12-05 21:58:58 +00004518 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004519 if (*fmt == '(') {
4520 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004521 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004522 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004523 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004524
4525 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004526 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004527 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004528 goto error;
4529 }
4530 ++fmt;
4531 --fmtcnt;
4532 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004533 /* Skip over balanced parentheses */
4534 while (pcount > 0 && --fmtcnt >= 0) {
4535 if (*fmt == ')')
4536 --pcount;
4537 else if (*fmt == '(')
4538 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004539 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004540 }
4541 keylen = fmt - keystart - 1;
4542 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004543 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004544 "incomplete format key");
4545 goto error;
4546 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004547 key = PyString_FromStringAndSize(keystart,
4548 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004549 if (key == NULL)
4550 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004551 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004552 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004553 args_owned = 0;
4554 }
4555 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004556 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004557 if (args == NULL) {
4558 goto error;
4559 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004560 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004561 arglen = -1;
4562 argidx = -2;
4563 }
Guido van Rossume5372401993-03-16 12:15:04 +00004564 while (--fmtcnt >= 0) {
4565 switch (c = *fmt++) {
4566 case '-': flags |= F_LJUST; continue;
4567 case '+': flags |= F_SIGN; continue;
4568 case ' ': flags |= F_BLANK; continue;
4569 case '#': flags |= F_ALT; continue;
4570 case '0': flags |= F_ZERO; continue;
4571 }
4572 break;
4573 }
4574 if (c == '*') {
4575 v = getnextarg(args, arglen, &argidx);
4576 if (v == NULL)
4577 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 if (!PyInt_Check(v)) {
4579 PyErr_SetString(PyExc_TypeError,
4580 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004581 goto error;
4582 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004583 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004584 if (width < 0) {
4585 flags |= F_LJUST;
4586 width = -width;
4587 }
Guido van Rossume5372401993-03-16 12:15:04 +00004588 if (--fmtcnt >= 0)
4589 c = *fmt++;
4590 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004591 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004592 width = c - '0';
4593 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004594 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004595 if (!isdigit(c))
4596 break;
4597 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004598 PyErr_SetString(
4599 PyExc_ValueError,
4600 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004601 goto error;
4602 }
4603 width = width*10 + (c - '0');
4604 }
4605 }
4606 if (c == '.') {
4607 prec = 0;
4608 if (--fmtcnt >= 0)
4609 c = *fmt++;
4610 if (c == '*') {
4611 v = getnextarg(args, arglen, &argidx);
4612 if (v == NULL)
4613 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004614 if (!PyInt_Check(v)) {
4615 PyErr_SetString(
4616 PyExc_TypeError,
4617 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004618 goto error;
4619 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004620 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004621 if (prec < 0)
4622 prec = 0;
4623 if (--fmtcnt >= 0)
4624 c = *fmt++;
4625 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004626 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004627 prec = c - '0';
4628 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004629 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004630 if (!isdigit(c))
4631 break;
4632 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 PyErr_SetString(
4634 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004635 "prec too big");
4636 goto error;
4637 }
4638 prec = prec*10 + (c - '0');
4639 }
4640 }
4641 } /* prec */
4642 if (fmtcnt >= 0) {
4643 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004644 if (--fmtcnt >= 0)
4645 c = *fmt++;
4646 }
4647 }
4648 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 PyErr_SetString(PyExc_ValueError,
4650 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004651 goto error;
4652 }
4653 if (c != '%') {
4654 v = getnextarg(args, arglen, &argidx);
4655 if (v == NULL)
4656 goto error;
4657 }
4658 sign = 0;
4659 fill = ' ';
4660 switch (c) {
4661 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004662 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004663 len = 1;
4664 break;
4665 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004666#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004667 if (PyUnicode_Check(v)) {
4668 fmt = fmt_start;
4669 argidx = argidx_start;
4670 goto unicode;
4671 }
Georg Brandld45014b2005-10-01 17:06:00 +00004672#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004673 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004674#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004675 if (temp != NULL && PyUnicode_Check(temp)) {
4676 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004677 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004678 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004679 goto unicode;
4680 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004681#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004682 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004683 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004684 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004685 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004686 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004687 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004688 if (!PyString_Check(temp)) {
4689 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004690 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004691 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004692 goto error;
4693 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004694 pbuf = PyString_AS_STRING(temp);
4695 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004696 if (prec >= 0 && len > prec)
4697 len = prec;
4698 break;
4699 case 'i':
4700 case 'd':
4701 case 'u':
4702 case 'o':
4703 case 'x':
4704 case 'X':
4705 if (c == 'i')
4706 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004707 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004708 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004709 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004710 prec, c, &pbuf, &ilen);
4711 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004712 if (!temp)
4713 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004714 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004715 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004716 else {
4717 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004718 len = formatint(pbuf,
4719 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004720 flags, prec, c, v);
4721 if (len < 0)
4722 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004723 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004724 }
4725 if (flags & F_ZERO)
4726 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004727 break;
4728 case 'e':
4729 case 'E':
4730 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004731 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004732 case 'g':
4733 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004734 if (c == 'F')
4735 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004736 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004737 len = formatfloat(pbuf, sizeof(formatbuf),
4738 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004739 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004740 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004741 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004742 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004743 fill = '0';
4744 break;
4745 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004746#ifdef Py_USING_UNICODE
4747 if (PyUnicode_Check(v)) {
4748 fmt = fmt_start;
4749 argidx = argidx_start;
4750 goto unicode;
4751 }
4752#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004753 pbuf = formatbuf;
4754 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004755 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004756 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004757 break;
4758 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004759 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004760 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004761 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004762 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004763 (Py_ssize_t)(fmt - 1 -
4764 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004765 goto error;
4766 }
4767 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004768 if (*pbuf == '-' || *pbuf == '+') {
4769 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004770 len--;
4771 }
4772 else if (flags & F_SIGN)
4773 sign = '+';
4774 else if (flags & F_BLANK)
4775 sign = ' ';
4776 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004777 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004778 }
4779 if (width < len)
4780 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004781 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004782 reslen -= rescnt;
4783 rescnt = width + fmtcnt + 100;
4784 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004785 if (reslen < 0) {
4786 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004787 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004788 return PyErr_NoMemory();
4789 }
Georg Brandl5f795862007-02-26 13:51:34 +00004790 if (_PyString_Resize(&result, reslen) < 0) {
4791 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004792 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004793 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004794 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004795 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004796 }
4797 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004798 if (fill != ' ')
4799 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004800 rescnt--;
4801 if (width > len)
4802 width--;
4803 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004804 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4805 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004806 assert(pbuf[1] == c);
4807 if (fill != ' ') {
4808 *res++ = *pbuf++;
4809 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004810 }
Tim Petersfff53252001-04-12 18:38:48 +00004811 rescnt -= 2;
4812 width -= 2;
4813 if (width < 0)
4814 width = 0;
4815 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004816 }
4817 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004818 do {
4819 --rescnt;
4820 *res++ = fill;
4821 } while (--width > len);
4822 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 if (fill == ' ') {
4824 if (sign)
4825 *res++ = sign;
4826 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004827 (c == 'x' || c == 'X')) {
4828 assert(pbuf[0] == '0');
4829 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004830 *res++ = *pbuf++;
4831 *res++ = *pbuf++;
4832 }
4833 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004834 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004835 res += len;
4836 rescnt -= len;
4837 while (--width >= len) {
4838 --rescnt;
4839 *res++ = ' ';
4840 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004841 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004842 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004843 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004844 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004845 goto error;
4846 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004847 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004848 } /* '%' */
4849 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004850 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004851 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004852 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004853 goto error;
4854 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004855 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004856 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004857 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004858 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004859 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004860
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004861#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004862 unicode:
4863 if (args_owned) {
4864 Py_DECREF(args);
4865 args_owned = 0;
4866 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004867 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004868 if (PyTuple_Check(orig_args) && argidx > 0) {
4869 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004870 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004871 v = PyTuple_New(n);
4872 if (v == NULL)
4873 goto error;
4874 while (--n >= 0) {
4875 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4876 Py_INCREF(w);
4877 PyTuple_SET_ITEM(v, n, w);
4878 }
4879 args = v;
4880 } else {
4881 Py_INCREF(orig_args);
4882 args = orig_args;
4883 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004884 args_owned = 1;
4885 /* Take what we have of the result and let the Unicode formatting
4886 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004887 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004888 if (_PyString_Resize(&result, rescnt))
4889 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004890 fmtcnt = PyString_GET_SIZE(format) - \
4891 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004892 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4893 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004894 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004895 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004896 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004897 if (v == NULL)
4898 goto error;
4899 /* Paste what we have (result) to what the Unicode formatting
4900 function returned (v) and return the result (or error) */
4901 w = PyUnicode_Concat(result, v);
4902 Py_DECREF(result);
4903 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004904 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004905 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004906#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004907
Guido van Rossume5372401993-03-16 12:15:04 +00004908 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004909 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004910 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004911 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004912 }
Guido van Rossume5372401993-03-16 12:15:04 +00004913 return NULL;
4914}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004915
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916void
Fred Drakeba096332000-07-09 07:04:36 +00004917PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004918{
4919 register PyStringObject *s = (PyStringObject *)(*p);
4920 PyObject *t;
4921 if (s == NULL || !PyString_Check(s))
4922 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004923 /* If it's a string subclass, we don't really know what putting
4924 it in the interned dict might do. */
4925 if (!PyString_CheckExact(s))
4926 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004928 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004929 if (interned == NULL) {
4930 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004931 if (interned == NULL) {
4932 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004933 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004934 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004935 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004936 t = PyDict_GetItem(interned, (PyObject *)s);
4937 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004938 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004939 Py_DECREF(*p);
4940 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941 return;
4942 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004943
Armin Rigo79f7ad22004-08-07 19:27:39 +00004944 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004945 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004946 return;
4947 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004948 /* The two references in interned are not counted by refcnt.
4949 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004950 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004951 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004952}
4953
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004954void
4955PyString_InternImmortal(PyObject **p)
4956{
4957 PyString_InternInPlace(p);
4958 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4959 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4960 Py_INCREF(*p);
4961 }
4962}
4963
Guido van Rossum2a61e741997-01-18 07:55:05 +00004964
4965PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004966PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004967{
4968 PyObject *s = PyString_FromString(cp);
4969 if (s == NULL)
4970 return NULL;
4971 PyString_InternInPlace(&s);
4972 return s;
4973}
4974
Guido van Rossum8cf04761997-08-02 02:57:45 +00004975void
Fred Drakeba096332000-07-09 07:04:36 +00004976PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004977{
4978 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004979 for (i = 0; i < UCHAR_MAX + 1; i++) {
4980 Py_XDECREF(characters[i]);
4981 characters[i] = NULL;
4982 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004983 Py_XDECREF(nullstring);
4984 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004985}
Barry Warsawa903ad982001-02-23 16:40:48 +00004986
Barry Warsawa903ad982001-02-23 16:40:48 +00004987void _Py_ReleaseInternedStrings(void)
4988{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004989 PyObject *keys;
4990 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004991 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004992
4993 if (interned == NULL || !PyDict_Check(interned))
4994 return;
4995 keys = PyDict_Keys(interned);
4996 if (keys == NULL || !PyList_Check(keys)) {
4997 PyErr_Clear();
4998 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004999 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005000
5001 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5002 detector, interned strings are not forcibly deallocated; rather, we
5003 give them their stolen references back, and then clear and DECREF
5004 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005005
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005006 fprintf(stderr, "releasing interned strings\n");
5007 n = PyList_GET_SIZE(keys);
5008 for (i = 0; i < n; i++) {
5009 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5010 switch (s->ob_sstate) {
5011 case SSTATE_NOT_INTERNED:
5012 /* XXX Shouldn't happen */
5013 break;
5014 case SSTATE_INTERNED_IMMORTAL:
5015 s->ob_refcnt += 1;
5016 break;
5017 case SSTATE_INTERNED_MORTAL:
5018 s->ob_refcnt += 2;
5019 break;
5020 default:
5021 Py_FatalError("Inconsistent interned string state.");
5022 }
5023 s->ob_sstate = SSTATE_NOT_INTERNED;
5024 }
5025 Py_DECREF(keys);
5026 PyDict_Clear(interned);
5027 Py_DECREF(interned);
5028 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005029}