blob: 3ba85e6438d074169c5d10715ebef623c433a80d [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Thomas Wouters477c8d52006-05-27 19:21:47 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Thomas Wouters477c8d52006-05-27 19:21:47 +0000769/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770/* Methods */
771
Thomas Wouters477c8d52006-05-27 19:21:47 +0000772#define STRINGLIB_CHAR char
773
774#define STRINGLIB_CMP memcmp
775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
778
779#define STRINGLIB_EMPTY nullstring
780
781#include "stringlib/fastsearch.h"
782
783#include "stringlib/count.h"
784#include "stringlib/find.h"
785#include "stringlib/partition.h"
786
787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000807#ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809#else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000812 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '"';
820
821 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000830 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000834 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000835 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000838 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839}
840
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841PyObject *
842PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000844 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000845 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000847 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
850 }
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000853 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
855 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000856 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 register char c;
858 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859 int quote;
860
Thomas Wouters7e474022000-07-16 12:04:32 +0000861 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000862 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000863 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000865 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 quote = '"';
867
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000869 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000875 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000890 else
891 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000896 _PyString_Resize(
Thomas Woutersd4ec0c32006-04-21 16:44:05 +0000897 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000898 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900}
901
Guido van Rossum189f1df2001-05-01 16:51:53 +0000902static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000903string_repr(PyObject *op)
904{
905 return PyString_Repr(op, 1);
906}
907
908static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000909string_str(PyObject *s)
910{
Tim Petersc9933152001-10-16 20:18:24 +0000911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
915 }
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921}
922
Martin v. Löwis18e16552006-02-15 17:27:45 +0000923static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000924string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925{
926 return a->ob_size;
927}
928
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000930string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000932 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000935#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000938#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000939 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000940 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000941 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942 return NULL;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
951 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
955 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
960 }
961
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000962 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000971 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973#undef b
974}
975
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000983 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 if (n < 0)
985 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
988 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
994 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 Py_INCREF(a);
997 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 }
Tim Peterse7c05322004-06-27 17:24:49 +0000999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1004 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001007 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001009 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001010 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001011 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1016 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001017 i = 0;
1018 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001021 }
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001025 i += j;
1026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
1030/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001033string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001035 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036{
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 Py_INCREF(a);
1046 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047 }
1048 if (j < i)
1049 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
Guido van Rossum9284a572000-03-07 15:53:43 +00001053static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001055{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00001058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001061 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001066 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001067
Thomas Wouters477c8d52006-05-27 19:21:47 +00001068 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001069}
1070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001072string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001074 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001077 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 return NULL;
1079 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001084 else {
1085#ifdef COUNT_ALLOCS
1086 one_strings++;
1087#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001088 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001089 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001090 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091}
1092
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093static PyObject*
1094string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001096 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001099 PyObject *result;
1100
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 result = Py_NotImplemented;
1104 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001105 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1114 }
1115 }
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001121 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164}
1165
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166static long
Fred Drakeba096332000-07-09 07:04:36 +00001167string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 register unsigned char *p;
1171 register long x;
1172
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 if (a->ob_shash != -1)
1174 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001179 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 return x;
1185}
1186
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001187static PyObject*
1188string_subscript(PyStringObject* self, PyObject* item)
1189{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001190 PyNumberMethods *nb = item->ob_type->tp_as_number;
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00001191 if (nb != NULL && nb->nb_index != NULL) {
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001192 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001193 if (i == -1 && PyErr_Occurred())
1194 return NULL;
1195 if (i < 0)
1196 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001197 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198 }
1199 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001200 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201 char* source_buf;
1202 char* result_buf;
1203 PyObject* result;
1204
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001205 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 PyString_GET_SIZE(self),
1207 &start, &stop, &step, &slicelength) < 0) {
1208 return NULL;
1209 }
1210
1211 if (slicelength <= 0) {
1212 return PyString_FromStringAndSize("", 0);
1213 }
1214 else {
1215 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001216 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001217 if (result_buf == NULL)
1218 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001220 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221 cur += step, i++) {
1222 result_buf[i] = source_buf[cur];
1223 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001224
1225 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001226 slicelength);
1227 PyMem_Free(result_buf);
1228 return result;
1229 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001230 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001231 else {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001232 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233 "string indices must be integers");
1234 return NULL;
1235 }
1236}
1237
Martin v. Löwis18e16552006-02-15 17:27:45 +00001238static Py_ssize_t
1239string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001240{
1241 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001242 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001243 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001244 return -1;
1245 }
1246 *ptr = (void *)self->ob_sval;
1247 return self->ob_size;
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
1251string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252{
Guido van Rossum045e6881997-09-08 18:30:11 +00001253 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001254 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001255 return -1;
1256}
1257
Martin v. Löwis18e16552006-02-15 17:27:45 +00001258static Py_ssize_t
1259string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001260{
1261 if ( lenp )
1262 *lenp = self->ob_size;
1263 return 1;
1264}
1265
Martin v. Löwis18e16552006-02-15 17:27:45 +00001266static Py_ssize_t
1267string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001268{
1269 if ( index != 0 ) {
1270 PyErr_SetString(PyExc_SystemError,
1271 "accessing non-existent string segment");
1272 return -1;
1273 }
1274 *ptr = self->ob_sval;
1275 return self->ob_size;
1276}
1277
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001278static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001279 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001280 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281 (ssizeargfunc)string_repeat, /*sq_repeat*/
1282 (ssizeargfunc)string_item, /*sq_item*/
1283 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001284 0, /*sq_ass_item*/
1285 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001286 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001287};
1288
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001289static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001290 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001291 (binaryfunc)string_subscript,
1292 0,
1293};
1294
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001295static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001296 (readbufferproc)string_buffer_getreadbuf,
1297 (writebufferproc)string_buffer_getwritebuf,
1298 (segcountproc)string_buffer_getsegcount,
1299 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001300};
1301
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302
1303
1304#define LEFTSTRIP 0
1305#define RIGHTSTRIP 1
1306#define BOTHSTRIP 2
1307
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001308/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001309static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1310
1311#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001312
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313
1314/* Don't call if length < 2 */
1315#define Py_STRING_MATCH(target, offset, pattern, length) \
1316 (target[offset] == pattern[0] && \
1317 target[offset+length-1] == pattern[length-1] && \
1318 !memcmp(target+offset+1, pattern+1, length-2) )
1319
1320
1321/* Overallocate the initial list to reduce the number of reallocs for small
1322 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1323 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1324 text (roughly 11 words per line) and field delimited data (usually 1-10
1325 fields). For large strings the split algorithms are bandwidth limited
1326 so increasing the preallocation likely will not improve things.*/
1327
1328#define MAX_PREALLOC 12
1329
1330/* 5 splits gives 6 elements */
1331#define PREALLOC_SIZE(maxsplit) \
1332 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1333
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334#define SPLIT_APPEND(data, left, right) \
1335 str = PyString_FromStringAndSize((data) + (left), \
1336 (right) - (left)); \
1337 if (str == NULL) \
1338 goto onError; \
1339 if (PyList_Append(list, str)) { \
1340 Py_DECREF(str); \
1341 goto onError; \
1342 } \
1343 else \
1344 Py_DECREF(str);
1345
Thomas Wouters477c8d52006-05-27 19:21:47 +00001346#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 str = PyString_FromStringAndSize((data) + (left), \
1348 (right) - (left)); \
1349 if (str == NULL) \
1350 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 if (count < MAX_PREALLOC) { \
1352 PyList_SET_ITEM(list, count, str); \
1353 } else { \
1354 if (PyList_Append(list, str)) { \
1355 Py_DECREF(str); \
1356 goto onError; \
1357 } \
1358 else \
1359 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001360 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001361 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362
Thomas Wouters477c8d52006-05-27 19:21:47 +00001363/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001364#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365
1366#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1367#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1368#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1369#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1370
1371Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001374 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001375 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001376 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001377
1378 if (list == NULL)
1379 return NULL;
1380
Thomas Wouters477c8d52006-05-27 19:21:47 +00001381 i = j = 0;
1382
1383 while (maxsplit-- > 0) {
1384 SKIP_SPACE(s, i, len);
1385 if (i==len) break;
1386 j = i; i++;
1387 SKIP_NONSPACE(s, i, len);
1388 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001390
1391 if (i < len) {
1392 /* Only occurs when maxsplit was reached */
1393 /* Skip any remaining whitespace and copy to end of string */
1394 SKIP_SPACE(s, i, len);
1395 if (i != len)
1396 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001397 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001398 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 Py_DECREF(list);
1402 return NULL;
1403}
1404
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001406split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001407{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001410 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411
1412 if (list == NULL)
1413 return NULL;
1414
Thomas Wouters477c8d52006-05-27 19:21:47 +00001415 i = j = 0;
1416 while ((j < len) && (maxcount-- > 0)) {
1417 for(; j<len; j++) {
1418 /* I found that using memchr makes no difference */
1419 if (s[j] == ch) {
1420 SPLIT_ADD(s, i, j);
1421 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001422 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001423 }
1424 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001426 if (i <= len) {
1427 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430 return list;
1431
1432 onError:
1433 Py_DECREF(list);
1434 return NULL;
1435}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001437PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438"S.split([sep [,maxsplit]]) -> list of strings\n\
1439\n\
1440Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001441delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001442splits are done. If sep is not specified or is None, any\n\
1443whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444
1445static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001446string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001448 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001449 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001451 PyObject *list, *str, *subobj = Py_None;
1452#ifdef USE_FAST
1453 Py_ssize_t pos;
1454#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001458 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001460 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1465 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001466#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001469#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001470 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1471 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001472
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 if (n == 0) {
1474 PyErr_SetString(PyExc_ValueError, "empty separator");
1475 return NULL;
1476 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 else if (n == 1)
1478 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479
Thomas Wouters477c8d52006-05-27 19:21:47 +00001480 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 if (list == NULL)
1482 return NULL;
1483
Thomas Wouters477c8d52006-05-27 19:21:47 +00001484#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486 while (maxsplit-- > 0) {
1487 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1488 if (pos < 0)
1489 break;
1490 j = i+pos;
1491 SPLIT_ADD(s, i, j);
1492 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494#else
1495 i = j = 0;
1496 while ((j+n <= len) && (maxsplit-- > 0)) {
1497 for (; j+n <= len; j++) {
1498 if (Py_STRING_MATCH(s, j, sub, n)) {
1499 SPLIT_ADD(s, i, j);
1500 i = j = j + n;
1501 break;
1502 }
1503 }
1504 }
1505#endif
1506 SPLIT_ADD(s, i, len);
1507 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508 return list;
1509
Thomas Wouters477c8d52006-05-27 19:21:47 +00001510 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 Py_DECREF(list);
1512 return NULL;
1513}
1514
Thomas Wouters477c8d52006-05-27 19:21:47 +00001515PyDoc_STRVAR(partition__doc__,
1516"S.partition(sep) -> (head, sep, tail)\n\
1517\n\
1518Searches for the separator sep in S, and returns the part before it,\n\
1519the separator itself, and the part after it. If the separator is not\n\
1520found, returns S and two empty strings.");
1521
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001522static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001523string_partition(PyStringObject *self, PyObject *sep_obj)
1524{
1525 const char *sep;
1526 Py_ssize_t sep_len;
1527
1528 if (PyString_Check(sep_obj)) {
1529 sep = PyString_AS_STRING(sep_obj);
1530 sep_len = PyString_GET_SIZE(sep_obj);
1531 }
1532#ifdef Py_USING_UNICODE
1533 else if (PyUnicode_Check(sep_obj))
1534 return PyUnicode_Partition((PyObject *) self, sep_obj);
1535#endif
1536 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1537 return NULL;
1538
1539 return stringlib_partition(
1540 (PyObject*) self,
1541 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1542 sep_obj, sep, sep_len
1543 );
1544}
1545
1546PyDoc_STRVAR(rpartition__doc__,
1547"S.rpartition(sep) -> (head, sep, tail)\n\
1548\n\
1549Searches for the separator sep in S, starting at the end of S, and returns\n\
1550the part before it, the separator itself, and the part after it. If the\n\
1551separator is not found, returns S and two empty strings.");
1552
1553static PyObject *
1554string_rpartition(PyStringObject *self, PyObject *sep_obj)
1555{
1556 const char *sep;
1557 Py_ssize_t sep_len;
1558
1559 if (PyString_Check(sep_obj)) {
1560 sep = PyString_AS_STRING(sep_obj);
1561 sep_len = PyString_GET_SIZE(sep_obj);
1562 }
1563#ifdef Py_USING_UNICODE
1564 else if (PyUnicode_Check(sep_obj))
1565 return PyUnicode_Partition((PyObject *) self, sep_obj);
1566#endif
1567 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1568 return NULL;
1569
1570 return stringlib_rpartition(
1571 (PyObject*) self,
1572 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573 sep_obj, sep, sep_len
1574 );
1575}
1576
1577Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001578rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001580 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001582 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001583
1584 if (list == NULL)
1585 return NULL;
1586
Thomas Wouters477c8d52006-05-27 19:21:47 +00001587 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001588
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 while (maxsplit-- > 0) {
1590 RSKIP_SPACE(s, i);
1591 if (i<0) break;
1592 j = i; i--;
1593 RSKIP_NONSPACE(s, i);
1594 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 if (i >= 0) {
1597 /* Only occurs when maxsplit was reached */
1598 /* Skip any remaining whitespace and copy to beginning of string */
1599 RSKIP_SPACE(s, i);
1600 if (i >= 0)
1601 SPLIT_ADD(s, 0, i + 1);
1602
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001603 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001604 FIX_PREALLOC_SIZE(list);
1605 if (PyList_Reverse(list) < 0)
1606 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001608 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 Py_DECREF(list);
1610 return NULL;
1611}
1612
Thomas Wouters477c8d52006-05-27 19:21:47 +00001613Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001614rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001615{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001616 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001617 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619
1620 if (list == NULL)
1621 return NULL;
1622
Thomas Wouters477c8d52006-05-27 19:21:47 +00001623 i = j = len - 1;
1624 while ((i >= 0) && (maxcount-- > 0)) {
1625 for (; i >= 0; i--) {
1626 if (s[i] == ch) {
1627 SPLIT_ADD(s, i + 1, j + 1);
1628 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001630 }
1631 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001632 }
1633 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001634 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001635 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 FIX_PREALLOC_SIZE(list);
1637 if (PyList_Reverse(list) < 0)
1638 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001639 return list;
1640
1641 onError:
1642 Py_DECREF(list);
1643 return NULL;
1644}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001645
1646PyDoc_STRVAR(rsplit__doc__,
1647"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1648\n\
1649Return a list of the words in the string S, using sep as the\n\
1650delimiter string, starting at the end of the string and working\n\
1651to the front. If maxsplit is given, at most maxsplit splits are\n\
1652done. If sep is not specified or is None, any whitespace string\n\
1653is a separator.");
1654
1655static PyObject *
1656string_rsplit(PyStringObject *self, PyObject *args)
1657{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001658 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001659 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001661 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001663 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664 return NULL;
1665 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001666 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001667 if (subobj == Py_None)
1668 return rsplit_whitespace(s, len, maxsplit);
1669 if (PyString_Check(subobj)) {
1670 sub = PyString_AS_STRING(subobj);
1671 n = PyString_GET_SIZE(subobj);
1672 }
1673#ifdef Py_USING_UNICODE
1674 else if (PyUnicode_Check(subobj))
1675 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1676#endif
1677 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1678 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001679
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001680 if (n == 0) {
1681 PyErr_SetString(PyExc_ValueError, "empty separator");
1682 return NULL;
1683 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001684 else if (n == 1)
1685 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001686
Thomas Wouters477c8d52006-05-27 19:21:47 +00001687 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001688 if (list == NULL)
1689 return NULL;
1690
1691 j = len;
1692 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693
Thomas Wouters477c8d52006-05-27 19:21:47 +00001694 while ( (i >= 0) && (maxsplit-- > 0) ) {
1695 for (; i>=0; i--) {
1696 if (Py_STRING_MATCH(s, i, sub, n)) {
1697 SPLIT_ADD(s, i + n, j);
1698 j = i;
1699 i -= n;
1700 break;
1701 }
1702 }
1703 }
1704 SPLIT_ADD(s, 0, j);
1705 FIX_PREALLOC_SIZE(list);
1706 if (PyList_Reverse(list) < 0)
1707 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001708 return list;
1709
Thomas Wouters477c8d52006-05-27 19:21:47 +00001710onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001711 Py_DECREF(list);
1712 return NULL;
1713}
1714
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001716PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717"S.join(sequence) -> string\n\
1718\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001720sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721
1722static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001723string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724{
1725 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001726 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001729 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001730 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001731 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001732 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
Tim Peters19fe14e2001-01-19 03:03:47 +00001734 seq = PySequence_Fast(orig, "");
1735 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001736 return NULL;
1737 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001738
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001739 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001740 if (seqlen == 0) {
1741 Py_DECREF(seq);
1742 return PyString_FromString("");
1743 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001745 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001746 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1747 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001749 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001750 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001752
Raymond Hettinger674f2412004-08-23 23:23:54 +00001753 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001754 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001755 * Do a pre-pass to figure out the total amount of space we'll
1756 * need (sz), see whether any argument is absurd, and defer to
1757 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001758 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001761 item = PySequence_Fast_GET_ITEM(seq, i);
1762 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001763#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001765 /* Defer to Unicode join.
1766 * CAUTION: There's no gurantee that the
1767 * original sequence can be iterated over
1768 * again, so we must pass seq here.
1769 */
1770 PyObject *result;
1771 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001772 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001773 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001774 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001775#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001777 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001778 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001779 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001780 Py_DECREF(seq);
1781 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001782 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 sz += PyString_GET_SIZE(item);
1784 if (i != 0)
1785 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001786 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001787 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001788 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001789 Py_DECREF(seq);
1790 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 }
1793
1794 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001795 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001796 if (res == NULL) {
1797 Py_DECREF(seq);
1798 return NULL;
1799 }
1800
1801 /* Catenate everything. */
1802 p = PyString_AS_STRING(res);
1803 for (i = 0; i < seqlen; ++i) {
1804 size_t n;
1805 item = PySequence_Fast_GET_ITEM(seq, i);
1806 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001807 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 p += n;
1809 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001810 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001811 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001812 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001814
Jeremy Hylton49048292000-07-11 03:28:17 +00001815 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817}
1818
Tim Peters52e155e2001-06-16 05:42:57 +00001819PyObject *
1820_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001821{
Tim Petersa7259592001-06-16 05:11:17 +00001822 assert(sep != NULL && PyString_Check(sep));
1823 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001824 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001825}
1826
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001829{
1830 if (*end > len)
1831 *end = len;
1832 else if (*end < 0)
1833 *end += len;
1834 if (*end < 0)
1835 *end = 0;
1836 if (*start < 0)
1837 *start += len;
1838 if (*start < 0)
1839 *start = 0;
1840}
1841
Thomas Wouters477c8d52006-05-27 19:21:47 +00001842Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001843string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001846 const char *sub;
1847 Py_ssize_t sub_len;
1848 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849
Thomas Wouters477c8d52006-05-27 19:21:47 +00001850 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1851 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852 return -2;
1853 if (PyString_Check(subobj)) {
1854 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001855 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001857#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001859 return PyUnicode_Find(
1860 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001861#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001862 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001863 /* XXX - the "expected a character buffer object" is pretty
1864 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 return -2;
1866
Thomas Wouters477c8d52006-05-27 19:21:47 +00001867 if (dir > 0)
1868 return stringlib_find_slice(
1869 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1870 sub, sub_len, start, end);
1871 else
1872 return stringlib_rfind_slice(
1873 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1874 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875}
1876
1877
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001878PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879"S.find(sub [,start [,end]]) -> int\n\
1880\n\
1881Return the lowest index in S where substring sub is found,\n\
1882such that sub is contained within s[start,end]. Optional\n\
1883arguments start and end are interpreted as in slice notation.\n\
1884\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001885Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886
1887static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001888string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001890 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891 if (result == -2)
1892 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894}
1895
1896
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898"S.index(sub [,start [,end]]) -> int\n\
1899\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001900Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901
1902static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001903string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 if (result == -2)
1907 return NULL;
1908 if (result == -1) {
1909 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001910 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return NULL;
1912 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001913 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914}
1915
1916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001917PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918"S.rfind(sub [,start [,end]]) -> int\n\
1919\n\
1920Return the highest index in S where substring sub is found,\n\
1921such that sub is contained within s[start,end]. Optional\n\
1922arguments start and end are interpreted as in slice notation.\n\
1923\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001924Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925
1926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001927string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001929 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930 if (result == -2)
1931 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933}
1934
1935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937"S.rindex(sub [,start [,end]]) -> int\n\
1938\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940
1941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001942string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945 if (result == -2)
1946 return NULL;
1947 if (result == -1) {
1948 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001949 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 return NULL;
1951 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001952 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953}
1954
1955
Thomas Wouters477c8d52006-05-27 19:21:47 +00001956Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1958{
1959 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001960 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001961 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1963 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964
1965 i = 0;
1966 if (striptype != RIGHTSTRIP) {
1967 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1968 i++;
1969 }
1970 }
1971
1972 j = len;
1973 if (striptype != LEFTSTRIP) {
1974 do {
1975 j--;
1976 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1977 j++;
1978 }
1979
1980 if (i == 0 && j == len && PyString_CheckExact(self)) {
1981 Py_INCREF(self);
1982 return (PyObject*)self;
1983 }
1984 else
1985 return PyString_FromStringAndSize(s+i, j-i);
1986}
1987
1988
Thomas Wouters477c8d52006-05-27 19:21:47 +00001989Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001990do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991{
1992 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001993 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995 i = 0;
1996 if (striptype != RIGHTSTRIP) {
1997 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1998 i++;
1999 }
2000 }
2001
2002 j = len;
2003 if (striptype != LEFTSTRIP) {
2004 do {
2005 j--;
2006 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2007 j++;
2008 }
2009
Tim Peters8fa5dd02001-09-12 02:18:30 +00002010 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 Py_INCREF(self);
2012 return (PyObject*)self;
2013 }
2014 else
2015 return PyString_FromStringAndSize(s+i, j-i);
2016}
2017
2018
Thomas Wouters477c8d52006-05-27 19:21:47 +00002019Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002020do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2021{
2022 PyObject *sep = NULL;
2023
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002024 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002025 return NULL;
2026
2027 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002028 if (PyString_Check(sep))
2029 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002030#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002031 else if (PyUnicode_Check(sep)) {
2032 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2033 PyObject *res;
2034 if (uniself==NULL)
2035 return NULL;
2036 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2037 striptype, sep);
2038 Py_DECREF(uniself);
2039 return res;
2040 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002041#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002042 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002043#ifdef Py_USING_UNICODE
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002044 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002045#else
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002046 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002047#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002048 STRIPNAME(striptype));
2049 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002050 }
2051
2052 return do_strip(self, striptype);
2053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002057"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058\n\
2059Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002061If chars is given and not None, remove characters in chars instead.\n\
2062If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
2064static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002065string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067 if (PyTuple_GET_SIZE(args) == 0)
2068 return do_strip(self, BOTHSTRIP); /* Common case */
2069 else
2070 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071}
2072
2073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002074PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002078If chars is given and not None, remove characters in chars instead.\n\
2079If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080
2081static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002082string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084 if (PyTuple_GET_SIZE(args) == 0)
2085 return do_strip(self, LEFTSTRIP); /* Common case */
2086 else
2087 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088}
2089
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002095If chars is given and not None, remove characters in chars instead.\n\
2096If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
2098static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002099string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101 if (PyTuple_GET_SIZE(args) == 0)
2102 return do_strip(self, RIGHTSTRIP); /* Common case */
2103 else
2104 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105}
2106
2107
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002108PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109"S.lower() -> string\n\
2110\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Thomas Wouters477c8d52006-05-27 19:21:47 +00002113/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2114#ifndef _tolower
2115#define _tolower tolower
2116#endif
2117
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002119string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002121 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002122 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002123 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002125 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002128
2129 s = PyString_AS_STRING(newobj);
2130
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002131 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002132
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002134 int c = Py_CHARMASK(s[i]);
2135 if (isupper(c))
2136 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002138
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002139 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140}
2141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002142PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143"S.upper() -> string\n\
2144\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147#ifndef _toupper
2148#define _toupper toupper
2149#endif
2150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002152string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002154 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002155 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002156 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002158 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161
2162 s = PyString_AS_STRING(newobj);
2163
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002164 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002165
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002167 int c = Py_CHARMASK(s[i]);
2168 if (islower(c))
2169 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002171
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002172 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173}
2174
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002175PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176"S.title() -> string\n\
2177\n\
2178Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002179characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180
2181static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002182string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183{
2184 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002185 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002187 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002189 newobj = PyString_FromStringAndSize(NULL, n);
2190 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002192 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 for (i = 0; i < n; i++) {
2194 int c = Py_CHARMASK(*s++);
2195 if (islower(c)) {
2196 if (!previous_is_cased)
2197 c = toupper(c);
2198 previous_is_cased = 1;
2199 } else if (isupper(c)) {
2200 if (previous_is_cased)
2201 c = tolower(c);
2202 previous_is_cased = 1;
2203 } else
2204 previous_is_cased = 0;
2205 *s_new++ = c;
2206 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002207 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002208}
2209
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211"S.capitalize() -> string\n\
2212\n\
2213Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002214capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215
2216static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002217string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218{
2219 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002220 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002221 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002223 newobj = PyString_FromStringAndSize(NULL, n);
2224 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002226 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 if (0 < n) {
2228 int c = Py_CHARMASK(*s++);
2229 if (islower(c))
2230 *s_new = toupper(c);
2231 else
2232 *s_new = c;
2233 s_new++;
2234 }
2235 for (i = 1; i < n; i++) {
2236 int c = Py_CHARMASK(*s++);
2237 if (isupper(c))
2238 *s_new = tolower(c);
2239 else
2240 *s_new = c;
2241 s_new++;
2242 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002243 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244}
2245
2246
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002247PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248"S.count(sub[, start[, end]]) -> int\n\
2249\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002250Return the number of non-overlapping occurrences of substring sub in\n\
2251string S[start:end]. Optional arguments start and end are interpreted\n\
2252as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253
2254static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002255string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002257 PyObject *sub_obj;
2258 const char *str = PyString_AS_STRING(self), *sub;
2259 Py_ssize_t sub_len;
2260 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002265
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 if (PyString_Check(sub_obj)) {
2267 sub = PyString_AS_STRING(sub_obj);
2268 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002270#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002272 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002273 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002274 if (count == -1)
2275 return NULL;
2276 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 return NULL;
2282
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002284
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 return PyInt_FromSsize_t(
2286 stringlib_count(str + start, end - start, sub, sub_len)
2287 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288}
2289
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002290PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291"S.swapcase() -> string\n\
2292\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002294converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295
2296static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002297string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298{
2299 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002300 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002301 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002303 newobj = PyString_FromStringAndSize(NULL, n);
2304 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002306 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 for (i = 0; i < n; i++) {
2308 int c = Py_CHARMASK(*s++);
2309 if (islower(c)) {
2310 *s_new = toupper(c);
2311 }
2312 else if (isupper(c)) {
2313 *s_new = tolower(c);
2314 }
2315 else
2316 *s_new = c;
2317 s_new++;
2318 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002319 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320}
2321
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.translate(table [,deletechars]) -> string\n\
2325\n\
2326Return a copy of the string S, where all characters occurring\n\
2327in the optional argument deletechars are removed, and the\n\
2328remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002329translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330
2331static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002332string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 register char *input, *output;
2335 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002336 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002339 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 PyObject *result;
2341 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002342 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002344 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347
2348 if (PyString_Check(tableobj)) {
2349 table1 = PyString_AS_STRING(tableobj);
2350 tablen = PyString_GET_SIZE(tableobj);
2351 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002352#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002354 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355 parameter; instead a mapping to None will cause characters
2356 to be deleted. */
2357 if (delobj != NULL) {
2358 PyErr_SetString(PyExc_TypeError,
2359 "deletions are implemented differently for unicode");
2360 return NULL;
2361 }
2362 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2363 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002364#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367
Martin v. Löwis00b61272002-12-12 20:03:19 +00002368 if (tablen != 256) {
2369 PyErr_SetString(PyExc_ValueError,
2370 "translation table must be 256 characters long");
2371 return NULL;
2372 }
2373
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 if (delobj != NULL) {
2375 if (PyString_Check(delobj)) {
2376 del_table = PyString_AS_STRING(delobj);
2377 dellen = PyString_GET_SIZE(delobj);
2378 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002379#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 else if (PyUnicode_Check(delobj)) {
2381 PyErr_SetString(PyExc_TypeError,
2382 "deletions are implemented differently for unicode");
2383 return NULL;
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2387 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 }
2389 else {
2390 del_table = NULL;
2391 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 }
2393
2394 table = table1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002395 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396 result = PyString_FromStringAndSize((char *)NULL, inlen);
2397 if (result == NULL)
2398 return NULL;
2399 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002400 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401
2402 if (dellen == 0) {
2403 /* If no deletions are required, use faster code */
2404 for (i = inlen; --i >= 0; ) {
2405 c = Py_CHARMASK(*input++);
2406 if (Py_CHARMASK((*output++ = table[c])) != c)
2407 changed = 1;
2408 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002409 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 return result;
2411 Py_DECREF(result);
2412 Py_INCREF(input_obj);
2413 return input_obj;
2414 }
2415
2416 for (i = 0; i < 256; i++)
2417 trans_table[i] = Py_CHARMASK(table[i]);
2418
2419 for (i = 0; i < dellen; i++)
2420 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2421
2422 for (i = inlen; --i >= 0; ) {
2423 c = Py_CHARMASK(*input++);
2424 if (trans_table[c] != -1)
2425 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2426 continue;
2427 changed = 1;
2428 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002429 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 Py_DECREF(result);
2431 Py_INCREF(input_obj);
2432 return input_obj;
2433 }
2434 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002435 if (inlen > 0)
2436 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437 return result;
2438}
2439
2440
Thomas Wouters477c8d52006-05-27 19:21:47 +00002441#define FORWARD 1
2442#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446#define findchar(target, target_len, c) \
2447 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448
Thomas Wouters477c8d52006-05-27 19:21:47 +00002449/* String ops must return a string. */
2450/* If the object is subclass of string, create a copy */
2451Py_LOCAL(PyStringObject *)
2452return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 if (PyString_CheckExact(self)) {
2455 Py_INCREF(self);
2456 return self;
2457 }
2458 return (PyStringObject *)PyString_FromStringAndSize(
2459 PyString_AS_STRING(self),
2460 PyString_GET_SIZE(self));
2461}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002465{
2466 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002467 const char *start=target;
2468 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469
Thomas Wouters477c8d52006-05-27 19:21:47 +00002470 while ( (start=findchar(start, end-start, c)) != NULL ) {
2471 count++;
2472 if (count >= maxcount)
2473 break;
2474 start += 1;
2475 }
2476 return count;
2477}
2478
2479Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002480findstring(const char *target, Py_ssize_t target_len,
2481 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002482 Py_ssize_t start,
2483 Py_ssize_t end,
2484 int direction)
2485{
2486 if (start < 0) {
2487 start += target_len;
2488 if (start < 0)
2489 start = 0;
2490 }
2491 if (end > target_len) {
2492 end = target_len;
2493 } else if (end < 0) {
2494 end += target_len;
2495 if (end < 0)
2496 end = 0;
2497 }
2498
2499 /* zero-length substrings always match at the first attempt */
2500 if (pattern_len == 0)
2501 return (direction > 0) ? start : end;
2502
2503 end -= pattern_len;
2504
2505 if (direction < 0) {
2506 for (; end >= start; end--)
2507 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2508 return end;
2509 } else {
2510 for (; start <= end; start++)
2511 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2512 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513 }
2514 return -1;
2515}
2516
Thomas Wouters477c8d52006-05-27 19:21:47 +00002517Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002518countstring(const char *target, Py_ssize_t target_len,
2519 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002520 Py_ssize_t start,
2521 Py_ssize_t end,
2522 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002524 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525
Thomas Wouters477c8d52006-05-27 19:21:47 +00002526 if (start < 0) {
2527 start += target_len;
2528 if (start < 0)
2529 start = 0;
2530 }
2531 if (end > target_len) {
2532 end = target_len;
2533 } else if (end < 0) {
2534 end += target_len;
2535 if (end < 0)
2536 end = 0;
2537 }
2538
2539 /* zero-length substrings match everywhere */
2540 if (pattern_len == 0 || maxcount == 0) {
2541 if (target_len+1 < maxcount)
2542 return target_len+1;
2543 return maxcount;
2544 }
2545
2546 end -= pattern_len;
2547 if (direction < 0) {
2548 for (; (end >= start); end--)
2549 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2550 count++;
2551 if (--maxcount <= 0) break;
2552 end -= pattern_len-1;
2553 }
2554 } else {
2555 for (; (start <= end); start++)
2556 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2557 count++;
2558 if (--maxcount <= 0)
2559 break;
2560 start += pattern_len-1;
2561 }
2562 }
2563 return count;
2564}
2565
2566
2567/* Algorithms for different cases of string replacement */
2568
2569/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2570Py_LOCAL(PyStringObject *)
2571replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002572 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002573 Py_ssize_t maxcount)
2574{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002575 char *self_s, *result_s;
2576 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002577 Py_ssize_t count, i, product;
2578 PyStringObject *result;
2579
2580 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002581
Thomas Wouters477c8d52006-05-27 19:21:47 +00002582 /* 1 at the end plus 1 after every character */
2583 count = self_len+1;
2584 if (maxcount < count)
2585 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002586
Thomas Wouters477c8d52006-05-27 19:21:47 +00002587 /* Check for overflow */
2588 /* result_len = count * to_len + self_len; */
2589 product = count * to_len;
2590 if (product / to_len != count) {
2591 PyErr_SetString(PyExc_OverflowError,
2592 "replace string is too long");
2593 return NULL;
2594 }
2595 result_len = product + self_len;
2596 if (result_len < 0) {
2597 PyErr_SetString(PyExc_OverflowError,
2598 "replace string is too long");
2599 return NULL;
2600 }
2601
2602 if (! (result = (PyStringObject *)
2603 PyString_FromStringAndSize(NULL, result_len)) )
2604 return NULL;
2605
2606 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607 result_s = PyString_AS_STRING(result);
2608
2609 /* TODO: special case single character, which doesn't need memcpy */
2610
2611 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002612 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002613 result_s += to_len;
2614 count -= 1;
2615
2616 for (i=0; i<count; i++) {
2617 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002618 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002619 result_s += to_len;
2620 }
2621
2622 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002623 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002624
2625 return result;
2626}
2627
2628/* Special case for deleting a single character */
2629/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2630Py_LOCAL(PyStringObject *)
2631replace_delete_single_character(PyStringObject *self,
2632 char from_c, Py_ssize_t maxcount)
2633{
2634 char *self_s, *result_s;
2635 char *start, *next, *end;
2636 Py_ssize_t self_len, result_len;
2637 Py_ssize_t count;
2638 PyStringObject *result;
2639
2640 self_len = PyString_GET_SIZE(self);
2641 self_s = PyString_AS_STRING(self);
2642
2643 count = countchar(self_s, self_len, from_c, maxcount);
2644 if (count == 0) {
2645 return return_self(self);
2646 }
2647
2648 result_len = self_len - count; /* from_len == 1 */
2649 assert(result_len>=0);
2650
2651 if ( (result = (PyStringObject *)
2652 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2653 return NULL;
2654 result_s = PyString_AS_STRING(result);
2655
2656 start = self_s;
2657 end = self_s + self_len;
2658 while (count-- > 0) {
2659 next = findchar(start, end-start, from_c);
2660 if (next == NULL)
2661 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002662 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002663 result_s += (next-start);
2664 start = next+1;
2665 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002666 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002667
Thomas Wouters477c8d52006-05-27 19:21:47 +00002668 return result;
2669}
2670
2671/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2672
2673Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002674replace_delete_substring(PyStringObject *self,
2675 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002676 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002678 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002679 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002680 Py_ssize_t count, offset;
2681 PyStringObject *result;
2682
2683 self_len = PyString_GET_SIZE(self);
2684 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002685
2686 count = countstring(self_s, self_len,
2687 from_s, from_len,
2688 0, self_len, 1,
2689 maxcount);
2690
2691 if (count == 0) {
2692 /* no matches */
2693 return return_self(self);
2694 }
2695
2696 result_len = self_len - (count * from_len);
2697 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002698
Thomas Wouters477c8d52006-05-27 19:21:47 +00002699 if ( (result = (PyStringObject *)
2700 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2701 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002702
Thomas Wouters477c8d52006-05-27 19:21:47 +00002703 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002704
Thomas Wouters477c8d52006-05-27 19:21:47 +00002705 start = self_s;
2706 end = self_s + self_len;
2707 while (count-- > 0) {
2708 offset = findstring(start, end-start,
2709 from_s, from_len,
2710 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002711 if (offset == -1)
2712 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002713 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002714
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002715 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002716
Thomas Wouters477c8d52006-05-27 19:21:47 +00002717 result_s += (next-start);
2718 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002719 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002720 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002721 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002722}
2723
Thomas Wouters477c8d52006-05-27 19:21:47 +00002724/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2725Py_LOCAL(PyStringObject *)
2726replace_single_character_in_place(PyStringObject *self,
2727 char from_c, char to_c,
2728 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002729{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002730 char *self_s, *result_s, *start, *end, *next;
2731 Py_ssize_t self_len;
2732 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002733
Thomas Wouters477c8d52006-05-27 19:21:47 +00002734 /* The result string will be the same size */
2735 self_s = PyString_AS_STRING(self);
2736 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002737
Thomas Wouters477c8d52006-05-27 19:21:47 +00002738 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002739
Thomas Wouters477c8d52006-05-27 19:21:47 +00002740 if (next == NULL) {
2741 /* No matches; return the original string */
2742 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002743 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002744
Thomas Wouters477c8d52006-05-27 19:21:47 +00002745 /* Need to make a new string */
2746 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2747 if (result == NULL)
2748 return NULL;
2749 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002750 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002751
Thomas Wouters477c8d52006-05-27 19:21:47 +00002752 /* change everything in-place, starting with this one */
2753 start = result_s + (next-self_s);
2754 *start = to_c;
2755 start++;
2756 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002757
Thomas Wouters477c8d52006-05-27 19:21:47 +00002758 while (--maxcount > 0) {
2759 next = findchar(start, end-start, from_c);
2760 if (next == NULL)
2761 break;
2762 *next = to_c;
2763 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002764 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002765
Thomas Wouters477c8d52006-05-27 19:21:47 +00002766 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002767}
2768
Thomas Wouters477c8d52006-05-27 19:21:47 +00002769/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2770Py_LOCAL(PyStringObject *)
2771replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002772 const char *from_s, Py_ssize_t from_len,
2773 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002774 Py_ssize_t maxcount)
2775{
2776 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002777 char *self_s;
2778 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002779 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002780
Thomas Wouters477c8d52006-05-27 19:21:47 +00002781 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002782
Thomas Wouters477c8d52006-05-27 19:21:47 +00002783 self_s = PyString_AS_STRING(self);
2784 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002785
Thomas Wouters477c8d52006-05-27 19:21:47 +00002786 offset = findstring(self_s, self_len,
2787 from_s, from_len,
2788 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002789 if (offset == -1) {
2790 /* No matches; return the original string */
2791 return return_self(self);
2792 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002793
Thomas Wouters477c8d52006-05-27 19:21:47 +00002794 /* Need to make a new string */
2795 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2796 if (result == NULL)
2797 return NULL;
2798 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002799 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002800
Thomas Wouters477c8d52006-05-27 19:21:47 +00002801 /* change everything in-place, starting with this one */
2802 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002803 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002804 start += from_len;
2805 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002806
Thomas Wouters477c8d52006-05-27 19:21:47 +00002807 while ( --maxcount > 0) {
2808 offset = findstring(start, end-start,
2809 from_s, from_len,
2810 0, end-start, FORWARD);
2811 if (offset==-1)
2812 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002813 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002814 start += offset+from_len;
2815 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002816
Thomas Wouters477c8d52006-05-27 19:21:47 +00002817 return result;
2818}
2819
2820/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2821Py_LOCAL(PyStringObject *)
2822replace_single_character(PyStringObject *self,
2823 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002824 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002825 Py_ssize_t maxcount)
2826{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002827 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002828 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002829 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002830 Py_ssize_t count, product;
2831 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002832
Thomas Wouters477c8d52006-05-27 19:21:47 +00002833 self_s = PyString_AS_STRING(self);
2834 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002835
Thomas Wouters477c8d52006-05-27 19:21:47 +00002836 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002837 if (count == 0) {
2838 /* no matches, return unchanged */
2839 return return_self(self);
2840 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002841
Thomas Wouters477c8d52006-05-27 19:21:47 +00002842 /* use the difference between current and new, hence the "-1" */
2843 /* result_len = self_len + count * (to_len-1) */
2844 product = count * (to_len-1);
2845 if (product / (to_len-1) != count) {
2846 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2847 return NULL;
2848 }
2849 result_len = self_len + product;
2850 if (result_len < 0) {
2851 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2852 return NULL;
2853 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002854
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 if ( (result = (PyStringObject *)
2856 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2857 return NULL;
2858 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002859
Thomas Wouters477c8d52006-05-27 19:21:47 +00002860 start = self_s;
2861 end = self_s + self_len;
2862 while (count-- > 0) {
2863 next = findchar(start, end-start, from_c);
2864 if (next == NULL)
2865 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002866
Thomas Wouters477c8d52006-05-27 19:21:47 +00002867 if (next == start) {
2868 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002869 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002870 result_s += to_len;
2871 start += 1;
2872 } else {
2873 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002874 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002875 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002876 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002877 result_s += to_len;
2878 start = next+1;
2879 }
2880 }
2881 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002882 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002883
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 return result;
2885}
2886
2887/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2888Py_LOCAL(PyStringObject *)
2889replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002890 const char *from_s, Py_ssize_t from_len,
2891 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002892 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002893 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002894 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002895 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002896 Py_ssize_t count, offset, product;
2897 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002898
Thomas Wouters477c8d52006-05-27 19:21:47 +00002899 self_s = PyString_AS_STRING(self);
2900 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002901
Thomas Wouters477c8d52006-05-27 19:21:47 +00002902 count = countstring(self_s, self_len,
2903 from_s, from_len,
2904 0, self_len, FORWARD, maxcount);
2905 if (count == 0) {
2906 /* no matches, return unchanged */
2907 return return_self(self);
2908 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002909
Thomas Wouters477c8d52006-05-27 19:21:47 +00002910 /* Check for overflow */
2911 /* result_len = self_len + count * (to_len-from_len) */
2912 product = count * (to_len-from_len);
2913 if (product / (to_len-from_len) != count) {
2914 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2915 return NULL;
2916 }
2917 result_len = self_len + product;
2918 if (result_len < 0) {
2919 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2920 return NULL;
2921 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002922
Thomas Wouters477c8d52006-05-27 19:21:47 +00002923 if ( (result = (PyStringObject *)
2924 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2925 return NULL;
2926 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002927
Thomas Wouters477c8d52006-05-27 19:21:47 +00002928 start = self_s;
2929 end = self_s + self_len;
2930 while (count-- > 0) {
2931 offset = findstring(start, end-start,
2932 from_s, from_len,
2933 0, end-start, FORWARD);
2934 if (offset == -1)
2935 break;
2936 next = start+offset;
2937 if (next == start) {
2938 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002939 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002940 result_s += to_len;
2941 start += from_len;
2942 } else {
2943 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002944 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002945 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002946 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002947 result_s += to_len;
2948 start = next+from_len;
2949 }
2950 }
2951 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002952 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002953
Thomas Wouters477c8d52006-05-27 19:21:47 +00002954 return result;
2955}
2956
2957
2958Py_LOCAL(PyStringObject *)
2959replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002960 const char *from_s, Py_ssize_t from_len,
2961 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002962 Py_ssize_t maxcount)
2963{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002964 if (maxcount < 0) {
2965 maxcount = PY_SSIZE_T_MAX;
2966 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2967 /* nothing to do; return the original string */
2968 return return_self(self);
2969 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002970
Thomas Wouters477c8d52006-05-27 19:21:47 +00002971 if (maxcount == 0 ||
2972 (from_len == 0 && to_len == 0)) {
2973 /* nothing to do; return the original string */
2974 return return_self(self);
2975 }
2976
2977 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002978
Thomas Wouters477c8d52006-05-27 19:21:47 +00002979 if (from_len == 0) {
2980 /* insert the 'to' string everywhere. */
2981 /* >>> "Python".replace("", ".") */
2982 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002983 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002984 }
2985
2986 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2987 /* point for an empty self string to generate a non-empty string */
2988 /* Special case so the remaining code always gets a non-empty string */
2989 if (PyString_GET_SIZE(self) == 0) {
2990 return return_self(self);
2991 }
2992
2993 if (to_len == 0) {
2994 /* delete all occurances of 'from' string */
2995 if (from_len == 1) {
2996 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002997 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002998 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002999 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003000 }
3001 }
3002
3003 /* Handle special case where both strings have the same length */
3004
3005 if (from_len == to_len) {
3006 if (from_len == 1) {
3007 return replace_single_character_in_place(
3008 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003009 from_s[0],
3010 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00003011 maxcount);
3012 } else {
3013 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003014 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003015 }
3016 }
3017
3018 /* Otherwise use the more generic algorithms */
3019 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003020 return replace_single_character(self, from_s[0],
3021 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003022 } else {
3023 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003024 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003025 }
3026}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003028PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003029"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003030\n\
3031Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003032old replaced by new. If the optional argument count is\n\
3033given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003034
3035static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003036string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003037{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003038 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003039 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003040 const char *from_s, *to_s;
3041 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003042
Thomas Wouters477c8d52006-05-27 19:21:47 +00003043 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003045
Thomas Wouters477c8d52006-05-27 19:21:47 +00003046 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003047 from_s = PyString_AS_STRING(from);
3048 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003049 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003050#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003051 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003052 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003053 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003054#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003055 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056 return NULL;
3057
Thomas Wouters477c8d52006-05-27 19:21:47 +00003058 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003059 to_s = PyString_AS_STRING(to);
3060 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003062#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003063 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003064 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003065 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003066#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003067 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003068 return NULL;
3069
Thomas Wouters477c8d52006-05-27 19:21:47 +00003070 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003071 from_s, from_len,
3072 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003073}
3074
Thomas Wouters477c8d52006-05-27 19:21:47 +00003075/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003077/* Matches the end (direction >= 0) or start (direction < 0) of self
3078 * against substr, using the start and end arguments. Returns
3079 * -1 on error, 0 if not found and 1 if found.
3080 */
3081Py_LOCAL(int)
3082_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3083 Py_ssize_t end, int direction)
3084{
3085 Py_ssize_t len = PyString_GET_SIZE(self);
3086 Py_ssize_t slen;
3087 const char* sub;
3088 const char* str;
3089
3090 if (PyString_Check(substr)) {
3091 sub = PyString_AS_STRING(substr);
3092 slen = PyString_GET_SIZE(substr);
3093 }
3094#ifdef Py_USING_UNICODE
3095 else if (PyUnicode_Check(substr))
3096 return PyUnicode_Tailmatch((PyObject *)self,
3097 substr, start, end, direction);
3098#endif
3099 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3100 return -1;
3101 str = PyString_AS_STRING(self);
3102
3103 string_adjust_indices(&start, &end, len);
3104
3105 if (direction < 0) {
3106 /* startswith */
3107 if (start+slen > len)
3108 return 0;
3109 } else {
3110 /* endswith */
3111 if (end-start < slen || start > len)
3112 return 0;
3113
3114 if (end-slen > start)
3115 start = end - slen;
3116 }
3117 if (end-start >= slen)
3118 return ! memcmp(str+start, sub, slen);
3119 return 0;
3120}
3121
3122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003124"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003125\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003126Return True if S starts with the specified prefix, False otherwise.\n\
3127With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003128With optional end, stop comparing S at that position.\n\
3129prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003130
3131static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003132string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003133{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003134 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003135 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003137 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003138
Guido van Rossumc6821402000-05-08 14:08:05 +00003139 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3140 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003142 if (PyTuple_Check(subobj)) {
3143 Py_ssize_t i;
3144 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3145 result = _string_tailmatch(self,
3146 PyTuple_GET_ITEM(subobj, i),
3147 start, end, -1);
3148 if (result == -1)
3149 return NULL;
3150 else if (result) {
3151 Py_RETURN_TRUE;
3152 }
3153 }
3154 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003156 result = _string_tailmatch(self, subobj, start, end, -1);
3157 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003159 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003160 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003161}
3162
3163
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003164PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003165"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003166\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003167Return True if S ends with the specified suffix, False otherwise.\n\
3168With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003169With optional end, stop comparing S at that position.\n\
3170suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003171
3172static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003173string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003174{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003175 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003176 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003178 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179
Guido van Rossumc6821402000-05-08 14:08:05 +00003180 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3181 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003183 if (PyTuple_Check(subobj)) {
3184 Py_ssize_t i;
3185 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3186 result = _string_tailmatch(self,
3187 PyTuple_GET_ITEM(subobj, i),
3188 start, end, +1);
3189 if (result == -1)
3190 return NULL;
3191 else if (result) {
3192 Py_RETURN_TRUE;
3193 }
3194 }
3195 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003197 result = _string_tailmatch(self, subobj, start, end, +1);
3198 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003200 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003201 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003202}
3203
3204
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003205PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003206"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003207\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003208Encodes S using the codec registered for encoding. encoding defaults\n\
3209to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003210handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003211a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3212'xmlcharrefreplace' as well as any other name registered with\n\
3213codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003214
3215static PyObject *
3216string_encode(PyStringObject *self, PyObject *args)
3217{
3218 char *encoding = NULL;
3219 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003220 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003221
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003222 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3223 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003224 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003225 if (v == NULL)
3226 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003227 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3228 PyErr_Format(PyExc_TypeError,
3229 "encoder did not return a string/unicode object "
3230 "(type=%.400s)",
3231 v->ob_type->tp_name);
3232 Py_DECREF(v);
3233 return NULL;
3234 }
3235 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003236
3237 onError:
3238 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003239}
3240
3241
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003242PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003243"S.decode([encoding[,errors]]) -> object\n\
3244\n\
3245Decodes S using the codec registered for encoding. encoding defaults\n\
3246to the default encoding. errors may be given to set a different error\n\
3247handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003248a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3249as well as any other name registerd with codecs.register_error that is\n\
3250able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003251
3252static PyObject *
3253string_decode(PyStringObject *self, PyObject *args)
3254{
3255 char *encoding = NULL;
3256 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003257 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003258
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003259 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3260 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003261 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003262 if (v == NULL)
3263 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003264 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3265 PyErr_Format(PyExc_TypeError,
3266 "decoder did not return a string/unicode object "
3267 "(type=%.400s)",
3268 v->ob_type->tp_name);
3269 Py_DECREF(v);
3270 return NULL;
3271 }
3272 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003273
3274 onError:
3275 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003276}
3277
3278
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003279PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003280"S.expandtabs([tabsize]) -> string\n\
3281\n\
3282Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003283If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003284
3285static PyObject*
3286string_expandtabs(PyStringObject *self, PyObject *args)
3287{
3288 const char *e, *p;
3289 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003290 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003291 PyObject *u;
3292 int tabsize = 8;
3293
3294 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3295 return NULL;
3296
Thomas Wouters7e474022000-07-16 12:04:32 +00003297 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298 i = j = 0;
3299 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3300 for (p = PyString_AS_STRING(self); p < e; p++)
3301 if (*p == '\t') {
3302 if (tabsize > 0)
3303 j += tabsize - (j % tabsize);
3304 }
3305 else {
3306 j++;
3307 if (*p == '\n' || *p == '\r') {
3308 i += j;
3309 j = 0;
3310 }
3311 }
3312
3313 /* Second pass: create output string and fill it */
3314 u = PyString_FromStringAndSize(NULL, i + j);
3315 if (!u)
3316 return NULL;
3317
3318 j = 0;
3319 q = PyString_AS_STRING(u);
3320
3321 for (p = PyString_AS_STRING(self); p < e; p++)
3322 if (*p == '\t') {
3323 if (tabsize > 0) {
3324 i = tabsize - (j % tabsize);
3325 j += i;
3326 while (i--)
3327 *q++ = ' ';
3328 }
3329 }
3330 else {
3331 j++;
3332 *q++ = *p;
3333 if (*p == '\n' || *p == '\r')
3334 j = 0;
3335 }
3336
3337 return u;
3338}
3339
Thomas Wouters477c8d52006-05-27 19:21:47 +00003340Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003341pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003342{
3343 PyObject *u;
3344
3345 if (left < 0)
3346 left = 0;
3347 if (right < 0)
3348 right = 0;
3349
Tim Peters8fa5dd02001-09-12 02:18:30 +00003350 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003351 Py_INCREF(self);
3352 return (PyObject *)self;
3353 }
3354
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003355 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356 left + PyString_GET_SIZE(self) + right);
3357 if (u) {
3358 if (left)
3359 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003360 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003361 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003362 PyString_GET_SIZE(self));
3363 if (right)
3364 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3365 fill, right);
3366 }
3367
3368 return u;
3369}
3370
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003371PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003372"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003373"\n"
3374"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003375"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376
3377static PyObject *
3378string_ljust(PyStringObject *self, PyObject *args)
3379{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003380 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003381 char fillchar = ' ';
3382
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003383 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384 return NULL;
3385
Tim Peters8fa5dd02001-09-12 02:18:30 +00003386 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 Py_INCREF(self);
3388 return (PyObject*) self;
3389 }
3390
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003391 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003392}
3393
3394
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003395PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003396"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003397"\n"
3398"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003399"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003400
3401static PyObject *
3402string_rjust(PyStringObject *self, PyObject *args)
3403{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003404 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003405 char fillchar = ' ';
3406
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003407 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003408 return NULL;
3409
Tim Peters8fa5dd02001-09-12 02:18:30 +00003410 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003411 Py_INCREF(self);
3412 return (PyObject*) self;
3413 }
3414
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003415 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003416}
3417
3418
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003419PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003420"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003421"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003422"Return S centered in a string of length width. Padding is\n"
3423"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003424
3425static PyObject *
3426string_center(PyStringObject *self, PyObject *args)
3427{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003428 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003429 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003430 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003432 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433 return NULL;
3434
Tim Peters8fa5dd02001-09-12 02:18:30 +00003435 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436 Py_INCREF(self);
3437 return (PyObject*) self;
3438 }
3439
3440 marg = width - PyString_GET_SIZE(self);
3441 left = marg / 2 + (marg & width & 1);
3442
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003443 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444}
3445
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003446PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003447"S.zfill(width) -> string\n"
3448"\n"
3449"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003450"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003451
3452static PyObject *
3453string_zfill(PyStringObject *self, PyObject *args)
3454{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003455 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003456 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003457 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003458 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003459
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003460 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003461 return NULL;
3462
3463 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003464 if (PyString_CheckExact(self)) {
3465 Py_INCREF(self);
3466 return (PyObject*) self;
3467 }
3468 else
3469 return PyString_FromStringAndSize(
3470 PyString_AS_STRING(self),
3471 PyString_GET_SIZE(self)
3472 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003473 }
3474
3475 fill = width - PyString_GET_SIZE(self);
3476
3477 s = pad(self, fill, 0, '0');
3478
3479 if (s == NULL)
3480 return NULL;
3481
3482 p = PyString_AS_STRING(s);
3483 if (p[fill] == '+' || p[fill] == '-') {
3484 /* move sign to beginning of string */
3485 p[0] = p[fill];
3486 p[fill] = '0';
3487 }
3488
3489 return (PyObject*) s;
3490}
3491
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003492PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003493"S.isspace() -> bool\n\
3494\n\
3495Return True if all characters in S are whitespace\n\
3496and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003497
3498static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003499string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500{
Fred Drakeba096332000-07-09 07:04:36 +00003501 register const unsigned char *p
3502 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003503 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003504
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 /* Shortcut for single character strings */
3506 if (PyString_GET_SIZE(self) == 1 &&
3507 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003508 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003510 /* Special case for empty strings */
3511 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003512 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003513
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514 e = p + PyString_GET_SIZE(self);
3515 for (; p < e; p++) {
3516 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003517 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003519 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003520}
3521
3522
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003523PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003524"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003525\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003526Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003527and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003528
3529static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003530string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003531{
Fred Drakeba096332000-07-09 07:04:36 +00003532 register const unsigned char *p
3533 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003534 register const unsigned char *e;
3535
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003536 /* Shortcut for single character strings */
3537 if (PyString_GET_SIZE(self) == 1 &&
3538 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003539 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003540
3541 /* Special case for empty strings */
3542 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003543 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003544
3545 e = p + PyString_GET_SIZE(self);
3546 for (; p < e; p++) {
3547 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003549 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003551}
3552
3553
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003554PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003555"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003556\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003557Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003558and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003559
3560static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003561string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562{
Fred Drakeba096332000-07-09 07:04:36 +00003563 register const unsigned char *p
3564 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565 register const unsigned char *e;
3566
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567 /* Shortcut for single character strings */
3568 if (PyString_GET_SIZE(self) == 1 &&
3569 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571
3572 /* Special case for empty strings */
3573 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003575
3576 e = p + PyString_GET_SIZE(self);
3577 for (; p < e; p++) {
3578 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003579 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003580 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582}
3583
3584
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003585PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003587\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003588Return True if all characters in S are digits\n\
3589and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003590
3591static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003592string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003593{
Fred Drakeba096332000-07-09 07:04:36 +00003594 register const unsigned char *p
3595 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003596 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self) == 1 &&
3600 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003603 /* Special case for empty strings */
3604 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003605 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003606
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607 e = p + PyString_GET_SIZE(self);
3608 for (; p < e; p++) {
3609 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613}
3614
3615
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003616PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003620at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621
3622static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003623string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624{
Fred Drakeba096332000-07-09 07:04:36 +00003625 register const unsigned char *p
3626 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003627 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628 int cased;
3629
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630 /* Shortcut for single character strings */
3631 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003634 /* Special case for empty strings */
3635 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003636 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003637
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638 e = p + PyString_GET_SIZE(self);
3639 cased = 0;
3640 for (; p < e; p++) {
3641 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643 else if (!cased && islower(*p))
3644 cased = 1;
3645 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647}
3648
3649
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003650PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003651"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003653Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003654at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655
3656static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003657string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658{
Fred Drakeba096332000-07-09 07:04:36 +00003659 register const unsigned char *p
3660 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003661 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662 int cased;
3663
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664 /* Shortcut for single character strings */
3665 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003668 /* Special case for empty strings */
3669 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003671
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672 e = p + PyString_GET_SIZE(self);
3673 cased = 0;
3674 for (; p < e; p++) {
3675 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677 else if (!cased && isupper(*p))
3678 cased = 1;
3679 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681}
3682
3683
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003684PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003685"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003687Return True if S is a titlecased string and there is at least one\n\
3688character in S, i.e. uppercase characters may only follow uncased\n\
3689characters and lowercase characters only cased ones. Return False\n\
3690otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691
3692static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003693string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694{
Fred Drakeba096332000-07-09 07:04:36 +00003695 register const unsigned char *p
3696 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003697 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698 int cased, previous_is_cased;
3699
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 /* Shortcut for single character strings */
3701 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003704 /* Special case for empty strings */
3705 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003707
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708 e = p + PyString_GET_SIZE(self);
3709 cased = 0;
3710 previous_is_cased = 0;
3711 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003712 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713
3714 if (isupper(ch)) {
3715 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717 previous_is_cased = 1;
3718 cased = 1;
3719 }
3720 else if (islower(ch)) {
3721 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003722 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723 previous_is_cased = 1;
3724 cased = 1;
3725 }
3726 else
3727 previous_is_cased = 0;
3728 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730}
3731
3732
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003733PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003734"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735\n\
3736Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003737Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003738is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740static PyObject*
3741string_splitlines(PyStringObject *self, PyObject *args)
3742{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003743 register Py_ssize_t i;
3744 register Py_ssize_t j;
3745 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003746 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 PyObject *list;
3748 PyObject *str;
3749 char *data;
3750
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003751 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752 return NULL;
3753
3754 data = PyString_AS_STRING(self);
3755 len = PyString_GET_SIZE(self);
3756
Thomas Wouters477c8d52006-05-27 19:21:47 +00003757 /* This does not use the preallocated list because splitlines is
3758 usually run with hundreds of newlines. The overhead of
3759 switching between PyList_SET_ITEM and append causes about a
3760 2-3% slowdown for that common case. A smarter implementation
3761 could move the if check out, so the SET_ITEMs are done first
3762 and the appends only done when the prealloc buffer is full.
3763 That's too much work for little gain.*/
3764
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 list = PyList_New(0);
3766 if (!list)
3767 goto onError;
3768
3769 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003770 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003771
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772 /* Find a line and append it */
3773 while (i < len && data[i] != '\n' && data[i] != '\r')
3774 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003775
3776 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003777 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778 if (i < len) {
3779 if (data[i] == '\r' && i + 1 < len &&
3780 data[i+1] == '\n')
3781 i += 2;
3782 else
3783 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003784 if (keepends)
3785 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003787 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788 j = i;
3789 }
3790 if (j < len) {
3791 SPLIT_APPEND(data, j, len);
3792 }
3793
3794 return list;
3795
3796 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003797 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798 return NULL;
3799}
3800
3801#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003802#undef SPLIT_ADD
3803#undef MAX_PREALLOC
3804#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003806static PyObject *
3807string_getnewargs(PyStringObject *v)
3808{
3809 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3810}
3811
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003812
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003813static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003814string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 /* Counterparts of the obsolete stropmodule functions; except
3816 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003817 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3818 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003819 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003820 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3821 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003822 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3823 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3824 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3825 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3826 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3827 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3828 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003829 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3830 capitalize__doc__},
3831 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3832 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3833 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003834 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003835 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3836 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3837 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3838 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3839 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3840 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3841 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003842 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3843 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003844 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3845 startswith__doc__},
3846 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3847 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3848 swapcase__doc__},
3849 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3850 translate__doc__},
3851 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3852 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3853 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3854 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3855 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3856 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3857 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3858 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3859 expandtabs__doc__},
3860 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3861 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003862 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003863 {NULL, NULL} /* sentinel */
3864};
3865
Jeremy Hylton938ace62002-07-17 16:30:39 +00003866static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003867str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3868
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003869static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003870string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003871{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003872 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003873 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003874
Guido van Rossumae960af2001-08-30 03:11:59 +00003875 if (type != &PyString_Type)
3876 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003877 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3878 return NULL;
3879 if (x == NULL)
3880 return PyString_FromString("");
3881 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003882}
3883
Guido van Rossumae960af2001-08-30 03:11:59 +00003884static PyObject *
3885str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3886{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003887 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003888 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003889
3890 assert(PyType_IsSubtype(type, &PyString_Type));
3891 tmp = string_new(&PyString_Type, args, kwds);
3892 if (tmp == NULL)
3893 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003894 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003895 n = PyString_GET_SIZE(tmp);
3896 pnew = type->tp_alloc(type, n);
3897 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003898 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003899 ((PyStringObject *)pnew)->ob_shash =
3900 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003901 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003902 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003903 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003904 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003905}
3906
Guido van Rossumcacfc072002-05-24 19:01:59 +00003907static PyObject *
3908basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3909{
3910 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003911 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003912 return NULL;
3913}
3914
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003915static PyObject *
3916string_mod(PyObject *v, PyObject *w)
3917{
3918 if (!PyString_Check(v)) {
3919 Py_INCREF(Py_NotImplemented);
3920 return Py_NotImplemented;
3921 }
3922 return PyString_Format(v, w);
3923}
3924
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003925PyDoc_STRVAR(basestring_doc,
3926"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003927
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003928static PyNumberMethods string_as_number = {
3929 0, /*nb_add*/
3930 0, /*nb_subtract*/
3931 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003932 string_mod, /*nb_remainder*/
3933};
3934
3935
Guido van Rossumcacfc072002-05-24 19:01:59 +00003936PyTypeObject PyBaseString_Type = {
3937 PyObject_HEAD_INIT(&PyType_Type)
3938 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003939 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003940 0,
3941 0,
3942 0, /* tp_dealloc */
3943 0, /* tp_print */
3944 0, /* tp_getattr */
3945 0, /* tp_setattr */
3946 0, /* tp_compare */
3947 0, /* tp_repr */
3948 0, /* tp_as_number */
3949 0, /* tp_as_sequence */
3950 0, /* tp_as_mapping */
3951 0, /* tp_hash */
3952 0, /* tp_call */
3953 0, /* tp_str */
3954 0, /* tp_getattro */
3955 0, /* tp_setattro */
3956 0, /* tp_as_buffer */
3957 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3958 basestring_doc, /* tp_doc */
3959 0, /* tp_traverse */
3960 0, /* tp_clear */
3961 0, /* tp_richcompare */
3962 0, /* tp_weaklistoffset */
3963 0, /* tp_iter */
3964 0, /* tp_iternext */
3965 0, /* tp_methods */
3966 0, /* tp_members */
3967 0, /* tp_getset */
3968 &PyBaseObject_Type, /* tp_base */
3969 0, /* tp_dict */
3970 0, /* tp_descr_get */
3971 0, /* tp_descr_set */
3972 0, /* tp_dictoffset */
3973 0, /* tp_init */
3974 0, /* tp_alloc */
3975 basestring_new, /* tp_new */
3976 0, /* tp_free */
3977};
3978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003979PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003980"str(object) -> string\n\
3981\n\
3982Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003983If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003984
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003985static PyObject *str_iter(PyObject *seq);
3986
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003987PyTypeObject PyString_Type = {
3988 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003989 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003990 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003992 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003993 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003994 (printfunc)string_print, /* tp_print */
3995 0, /* tp_getattr */
3996 0, /* tp_setattr */
3997 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003998 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003999 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004000 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004001 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004002 (hashfunc)string_hash, /* tp_hash */
4003 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004004 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004005 PyObject_GenericGetAttr, /* tp_getattro */
4006 0, /* tp_setattro */
4007 &string_as_buffer, /* tp_as_buffer */
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00004008 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004009 string_doc, /* tp_doc */
4010 0, /* tp_traverse */
4011 0, /* tp_clear */
4012 (richcmpfunc)string_richcompare, /* tp_richcompare */
4013 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004014 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015 0, /* tp_iternext */
4016 string_methods, /* tp_methods */
4017 0, /* tp_members */
4018 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004019 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004020 0, /* tp_dict */
4021 0, /* tp_descr_get */
4022 0, /* tp_descr_set */
4023 0, /* tp_dictoffset */
4024 0, /* tp_init */
4025 0, /* tp_alloc */
4026 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004027 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004028};
4029
4030void
Fred Drakeba096332000-07-09 07:04:36 +00004031PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004032{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004034 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004035 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 if (w == NULL || !PyString_Check(*pv)) {
4037 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004038 *pv = NULL;
4039 return;
4040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041 v = string_concat((PyStringObject *) *pv, w);
4042 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043 *pv = v;
4044}
4045
Guido van Rossum013142a1994-08-30 08:19:36 +00004046void
Fred Drakeba096332000-07-09 07:04:36 +00004047PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004048{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 PyString_Concat(pv, w);
4050 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004051}
4052
4053
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004054/* The following function breaks the notion that strings are immutable:
4055 it changes the size of a string. We get away with this only if there
4056 is only one module referencing the object. You can also think of it
4057 as creating a new string object and destroying the old one, only
4058 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004059 already be known to some other part of the code...
4060 Note that if there's not enough memory to resize the string, the original
4061 string object at *pv is deallocated, *pv is set to NULL, an "out of
4062 memory" exception is set, and -1 is returned. Else (on success) 0 is
4063 returned, and the value in *pv may or may not be the same as on input.
4064 As always, an extra byte is allocated for a trailing \0 byte (newsize
4065 does *not* include that), and a trailing \0 byte is stored.
4066*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004067
4068int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004069_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004070{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004071 register PyObject *v;
4072 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004073 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004074 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4075 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004076 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004077 Py_DECREF(v);
4078 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004079 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004080 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004081 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004082 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 _Py_ForgetReference(v);
4084 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004085 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004086 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004087 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004088 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004089 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 _Py_NewReference(*pv);
4092 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004093 sv->ob_size = newsize;
4094 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004095 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004096 return 0;
4097}
Guido van Rossume5372401993-03-16 12:15:04 +00004098
4099/* Helpers for formatstring */
4100
Thomas Wouters477c8d52006-05-27 19:21:47 +00004101Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004102getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004103{
Thomas Wouters977485d2006-02-16 15:59:12 +00004104 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004105 if (argidx < arglen) {
4106 (*p_argidx)++;
4107 if (arglen < 0)
4108 return args;
4109 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004110 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004111 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004112 PyErr_SetString(PyExc_TypeError,
4113 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004114 return NULL;
4115}
4116
Tim Peters38fd5b62000-09-21 05:43:11 +00004117/* Format codes
4118 * F_LJUST '-'
4119 * F_SIGN '+'
4120 * F_BLANK ' '
4121 * F_ALT '#'
4122 * F_ZERO '0'
4123 */
Guido van Rossume5372401993-03-16 12:15:04 +00004124#define F_LJUST (1<<0)
4125#define F_SIGN (1<<1)
4126#define F_BLANK (1<<2)
4127#define F_ALT (1<<3)
4128#define F_ZERO (1<<4)
4129
Thomas Wouters477c8d52006-05-27 19:21:47 +00004130Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004131formatfloat(char *buf, size_t buflen, int flags,
4132 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004133{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004134 /* fmt = '%#.' + `prec` + `type`
4135 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004136 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004137 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004138 x = PyFloat_AsDouble(v);
4139 if (x == -1.0 && PyErr_Occurred()) {
4140 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004141 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004142 }
Guido van Rossume5372401993-03-16 12:15:04 +00004143 if (prec < 0)
4144 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004145 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4146 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004147 /* Worst case length calc to ensure no buffer overrun:
4148
4149 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004150 fmt = %#.<prec>g
4151 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004152 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004153 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004154
4155 'f' formats:
4156 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4157 len = 1 + 50 + 1 + prec = 52 + prec
4158
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004159 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004160 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004161
4162 */
4163 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4164 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004165 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004166 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004167 return -1;
4168 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004169 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4170 (flags&F_ALT) ? "#" : "",
4171 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004172 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004173 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004174}
4175
Tim Peters38fd5b62000-09-21 05:43:11 +00004176/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4177 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4178 * Python's regular ints.
4179 * Return value: a new PyString*, or NULL if error.
4180 * . *pbuf is set to point into it,
4181 * *plen set to the # of chars following that.
4182 * Caller must decref it when done using pbuf.
4183 * The string starting at *pbuf is of the form
4184 * "-"? ("0x" | "0X")? digit+
4185 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004186 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004187 * There will be at least prec digits, zero-filled on the left if
4188 * necessary to get that many.
4189 * val object to be converted
4190 * flags bitmask of format flags; only F_ALT is looked at
4191 * prec minimum number of digits; 0-fill on left if needed
4192 * type a character in [duoxX]; u acts the same as d
4193 *
4194 * CAUTION: o, x and X conversions on regular ints can never
4195 * produce a '-' sign, but can for Python's unbounded ints.
4196 */
4197PyObject*
4198_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4199 char **pbuf, int *plen)
4200{
4201 PyObject *result = NULL;
4202 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004203 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004204 int sign; /* 1 if '-', else 0 */
4205 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004206 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004207 int numdigits; /* len == numnondigits + numdigits */
4208 int numnondigits = 0;
4209
4210 switch (type) {
4211 case 'd':
4212 case 'u':
4213 result = val->ob_type->tp_str(val);
4214 break;
4215 case 'o':
4216 result = val->ob_type->tp_as_number->nb_oct(val);
4217 break;
4218 case 'x':
4219 case 'X':
4220 numnondigits = 2;
4221 result = val->ob_type->tp_as_number->nb_hex(val);
4222 break;
4223 default:
4224 assert(!"'type' not in [duoxX]");
4225 }
4226 if (!result)
4227 return NULL;
4228
4229 /* To modify the string in-place, there can only be one reference. */
4230 if (result->ob_refcnt != 1) {
4231 PyErr_BadInternalCall();
4232 return NULL;
4233 }
4234 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004235 llen = PyString_Size(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004236 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004237 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4238 return NULL;
4239 }
4240 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 if (buf[len-1] == 'L') {
4242 --len;
4243 buf[len] = '\0';
4244 }
4245 sign = buf[0] == '-';
4246 numnondigits += sign;
4247 numdigits = len - numnondigits;
4248 assert(numdigits > 0);
4249
Tim Petersfff53252001-04-12 18:38:48 +00004250 /* Get rid of base marker unless F_ALT */
4251 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004252 /* Need to skip 0x, 0X or 0. */
4253 int skipped = 0;
4254 switch (type) {
4255 case 'o':
4256 assert(buf[sign] == '0');
4257 /* If 0 is only digit, leave it alone. */
4258 if (numdigits > 1) {
4259 skipped = 1;
4260 --numdigits;
4261 }
4262 break;
4263 case 'x':
4264 case 'X':
4265 assert(buf[sign] == '0');
4266 assert(buf[sign + 1] == 'x');
4267 skipped = 2;
4268 numnondigits -= 2;
4269 break;
4270 }
4271 if (skipped) {
4272 buf += skipped;
4273 len -= skipped;
4274 if (sign)
4275 buf[0] = '-';
4276 }
4277 assert(len == numnondigits + numdigits);
4278 assert(numdigits > 0);
4279 }
4280
4281 /* Fill with leading zeroes to meet minimum width. */
4282 if (prec > numdigits) {
4283 PyObject *r1 = PyString_FromStringAndSize(NULL,
4284 numnondigits + prec);
4285 char *b1;
4286 if (!r1) {
4287 Py_DECREF(result);
4288 return NULL;
4289 }
4290 b1 = PyString_AS_STRING(r1);
4291 for (i = 0; i < numnondigits; ++i)
4292 *b1++ = *buf++;
4293 for (i = 0; i < prec - numdigits; i++)
4294 *b1++ = '0';
4295 for (i = 0; i < numdigits; i++)
4296 *b1++ = *buf++;
4297 *b1 = '\0';
4298 Py_DECREF(result);
4299 result = r1;
4300 buf = PyString_AS_STRING(result);
4301 len = numnondigits + prec;
4302 }
4303
4304 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004305 if (type == 'X') {
4306 /* Need to convert all lower case letters to upper case.
4307 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004308 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004309 if (buf[i] >= 'a' && buf[i] <= 'x')
4310 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004311 }
4312 *pbuf = buf;
4313 *plen = len;
4314 return result;
4315}
4316
Thomas Wouters477c8d52006-05-27 19:21:47 +00004317Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004318formatint(char *buf, size_t buflen, int flags,
4319 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004320{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004321 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004322 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4323 + 1 + 1 = 24 */
4324 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004325 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004326 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004327
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004328 x = PyInt_AsLong(v);
4329 if (x == -1 && PyErr_Occurred()) {
4330 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004331 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004332 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004333 if (x < 0 && type == 'u') {
4334 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004335 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004336 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4337 sign = "-";
4338 else
4339 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004340 if (prec < 0)
4341 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004342
4343 if ((flags & F_ALT) &&
4344 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004345 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004346 * of issues that cause pain:
4347 * - when 0 is being converted, the C standard leaves off
4348 * the '0x' or '0X', which is inconsistent with other
4349 * %#x/%#X conversions and inconsistent with Python's
4350 * hex() function
4351 * - there are platforms that violate the standard and
4352 * convert 0 with the '0x' or '0X'
4353 * (Metrowerks, Compaq Tru64)
4354 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004355 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004356 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004357 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004358 * We can achieve the desired consistency by inserting our
4359 * own '0x' or '0X' prefix, and substituting %x/%X in place
4360 * of %#x/%#X.
4361 *
4362 * Note that this is the same approach as used in
4363 * formatint() in unicodeobject.c
4364 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004365 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4366 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004367 }
4368 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004369 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4370 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 prec, type);
4372 }
4373
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004374 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4375 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004377 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004378 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004379 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004380 return -1;
4381 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004382 if (sign[0])
4383 PyOS_snprintf(buf, buflen, fmt, -x);
4384 else
4385 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004386 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004387}
4388
Thomas Wouters477c8d52006-05-27 19:21:47 +00004389Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004390formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004391{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004392 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004393 if (PyString_Check(v)) {
4394 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004395 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004396 }
4397 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004398 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004399 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004400 }
4401 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004402 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004403}
4404
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004405/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4406
4407 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4408 chars are formatted. XXX This is a magic number. Each formatting
4409 routine does bounds checking to ensure no overflow, but a better
4410 solution may be to malloc a buffer of appropriate size for each
4411 format. For now, the current solution is sufficient.
4412*/
4413#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004414
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004415PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004416PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004417{
4418 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004419 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004420 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004421 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004422 PyObject *result, *orig_args;
4423#ifdef Py_USING_UNICODE
4424 PyObject *v, *w;
4425#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004426 PyObject *dict = NULL;
4427 if (format == NULL || !PyString_Check(format) || args == NULL) {
4428 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004429 return NULL;
4430 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004431 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004432 fmt = PyString_AS_STRING(format);
4433 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004434 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004435 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004436 if (result == NULL)
4437 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004438 res = PyString_AsString(result);
4439 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004440 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004441 argidx = 0;
4442 }
4443 else {
4444 arglen = -1;
4445 argidx = -2;
4446 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004447 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4448 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004449 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004450 while (--fmtcnt >= 0) {
4451 if (*fmt != '%') {
4452 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004453 rescnt = fmtcnt + 100;
4454 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004455 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004456 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004457 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004458 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004459 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 }
4461 *res++ = *fmt++;
4462 }
4463 else {
4464 /* Got a format specifier */
4465 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004466 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004467 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004468 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004469 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004470 PyObject *v = NULL;
4471 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004472 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004473 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004474 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004475 char formatbuf[FORMATBUFLEN];
4476 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004477#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004478 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004479 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004480#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004481
Guido van Rossumda9c2711996-12-05 21:58:58 +00004482 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004483 if (*fmt == '(') {
4484 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004485 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004486 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004487 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004488
4489 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004491 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004492 goto error;
4493 }
4494 ++fmt;
4495 --fmtcnt;
4496 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004497 /* Skip over balanced parentheses */
4498 while (pcount > 0 && --fmtcnt >= 0) {
4499 if (*fmt == ')')
4500 --pcount;
4501 else if (*fmt == '(')
4502 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004503 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004504 }
4505 keylen = fmt - keystart - 1;
4506 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 "incomplete format key");
4509 goto error;
4510 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 key = PyString_FromStringAndSize(keystart,
4512 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 if (key == NULL)
4514 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004515 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004517 args_owned = 0;
4518 }
4519 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004521 if (args == NULL) {
4522 goto error;
4523 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004524 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004525 arglen = -1;
4526 argidx = -2;
4527 }
Guido van Rossume5372401993-03-16 12:15:04 +00004528 while (--fmtcnt >= 0) {
4529 switch (c = *fmt++) {
4530 case '-': flags |= F_LJUST; continue;
4531 case '+': flags |= F_SIGN; continue;
4532 case ' ': flags |= F_BLANK; continue;
4533 case '#': flags |= F_ALT; continue;
4534 case '0': flags |= F_ZERO; continue;
4535 }
4536 break;
4537 }
4538 if (c == '*') {
4539 v = getnextarg(args, arglen, &argidx);
4540 if (v == NULL)
4541 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 if (!PyInt_Check(v)) {
4543 PyErr_SetString(PyExc_TypeError,
4544 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004545 goto error;
4546 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004547 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004548 if (width < 0) {
4549 flags |= F_LJUST;
4550 width = -width;
4551 }
Guido van Rossume5372401993-03-16 12:15:04 +00004552 if (--fmtcnt >= 0)
4553 c = *fmt++;
4554 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004555 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004556 width = c - '0';
4557 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004558 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004559 if (!isdigit(c))
4560 break;
4561 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004562 PyErr_SetString(
4563 PyExc_ValueError,
4564 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004565 goto error;
4566 }
4567 width = width*10 + (c - '0');
4568 }
4569 }
4570 if (c == '.') {
4571 prec = 0;
4572 if (--fmtcnt >= 0)
4573 c = *fmt++;
4574 if (c == '*') {
4575 v = getnextarg(args, arglen, &argidx);
4576 if (v == NULL)
4577 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 if (!PyInt_Check(v)) {
4579 PyErr_SetString(
4580 PyExc_TypeError,
4581 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004582 goto error;
4583 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004585 if (prec < 0)
4586 prec = 0;
4587 if (--fmtcnt >= 0)
4588 c = *fmt++;
4589 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004590 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004591 prec = c - '0';
4592 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004593 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004594 if (!isdigit(c))
4595 break;
4596 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 PyErr_SetString(
4598 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004599 "prec too big");
4600 goto error;
4601 }
4602 prec = prec*10 + (c - '0');
4603 }
4604 }
4605 } /* prec */
4606 if (fmtcnt >= 0) {
4607 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004608 if (--fmtcnt >= 0)
4609 c = *fmt++;
4610 }
4611 }
4612 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004613 PyErr_SetString(PyExc_ValueError,
4614 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004615 goto error;
4616 }
4617 if (c != '%') {
4618 v = getnextarg(args, arglen, &argidx);
4619 if (v == NULL)
4620 goto error;
4621 }
4622 sign = 0;
4623 fill = ' ';
4624 switch (c) {
4625 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004626 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004627 len = 1;
4628 break;
4629 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004630#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004631 if (PyUnicode_Check(v)) {
4632 fmt = fmt_start;
4633 argidx = argidx_start;
4634 goto unicode;
4635 }
Georg Brandld45014b2005-10-01 17:06:00 +00004636#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004637 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004638#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004639 if (temp != NULL && PyUnicode_Check(temp)) {
4640 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004641 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004642 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004643 goto unicode;
4644 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004645#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004646 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004647 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004648 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004649 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004650 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004651 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004652 if (!PyString_Check(temp)) {
4653 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004654 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004655 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004656 goto error;
4657 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004658 pbuf = PyString_AS_STRING(temp);
4659 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004660 if (prec >= 0 && len > prec)
4661 len = prec;
4662 break;
4663 case 'i':
4664 case 'd':
4665 case 'u':
4666 case 'o':
4667 case 'x':
4668 case 'X':
4669 if (c == 'i')
4670 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004671 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004672 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004673 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004674 prec, c, &pbuf, &ilen);
4675 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004676 if (!temp)
4677 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004678 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004679 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004680 else {
4681 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004682 len = formatint(pbuf,
4683 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004684 flags, prec, c, v);
4685 if (len < 0)
4686 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004687 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004688 }
4689 if (flags & F_ZERO)
4690 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004691 break;
4692 case 'e':
4693 case 'E':
4694 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004695 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004696 case 'g':
4697 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004698 if (c == 'F')
4699 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004700 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004701 len = formatfloat(pbuf, sizeof(formatbuf),
4702 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004703 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004704 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004705 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004706 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004707 fill = '0';
4708 break;
4709 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004710#ifdef Py_USING_UNICODE
4711 if (PyUnicode_Check(v)) {
4712 fmt = fmt_start;
4713 argidx = argidx_start;
4714 goto unicode;
4715 }
4716#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004717 pbuf = formatbuf;
4718 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004719 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004720 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004721 break;
4722 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004723 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004724 "unsupported format character '%c' (0x%x) "
4725 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004726 c, c,
4727 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004728 goto error;
4729 }
4730 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004731 if (*pbuf == '-' || *pbuf == '+') {
4732 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004733 len--;
4734 }
4735 else if (flags & F_SIGN)
4736 sign = '+';
4737 else if (flags & F_BLANK)
4738 sign = ' ';
4739 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004741 }
4742 if (width < len)
4743 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004744 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004745 reslen -= rescnt;
4746 rescnt = width + fmtcnt + 100;
4747 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004748 if (reslen < 0) {
4749 Py_DECREF(result);
4750 return PyErr_NoMemory();
4751 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004752 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004753 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004754 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004755 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004756 }
4757 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004758 if (fill != ' ')
4759 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004760 rescnt--;
4761 if (width > len)
4762 width--;
4763 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4765 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004766 assert(pbuf[1] == c);
4767 if (fill != ' ') {
4768 *res++ = *pbuf++;
4769 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 }
Tim Petersfff53252001-04-12 18:38:48 +00004771 rescnt -= 2;
4772 width -= 2;
4773 if (width < 0)
4774 width = 0;
4775 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004776 }
4777 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004778 do {
4779 --rescnt;
4780 *res++ = fill;
4781 } while (--width > len);
4782 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004783 if (fill == ' ') {
4784 if (sign)
4785 *res++ = sign;
4786 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004787 (c == 'x' || c == 'X')) {
4788 assert(pbuf[0] == '0');
4789 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 *res++ = *pbuf++;
4791 *res++ = *pbuf++;
4792 }
4793 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004794 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004795 res += len;
4796 rescnt -= len;
4797 while (--width >= len) {
4798 --rescnt;
4799 *res++ = ' ';
4800 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004801 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004802 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004803 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004804 goto error;
4805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004806 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004807 } /* '%' */
4808 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004809 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004811 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004812 goto error;
4813 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004814 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004815 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004816 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004817 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004818 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004819
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004820#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004821 unicode:
4822 if (args_owned) {
4823 Py_DECREF(args);
4824 args_owned = 0;
4825 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004826 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004827 if (PyTuple_Check(orig_args) && argidx > 0) {
4828 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004829 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004830 v = PyTuple_New(n);
4831 if (v == NULL)
4832 goto error;
4833 while (--n >= 0) {
4834 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4835 Py_INCREF(w);
4836 PyTuple_SET_ITEM(v, n, w);
4837 }
4838 args = v;
4839 } else {
4840 Py_INCREF(orig_args);
4841 args = orig_args;
4842 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004843 args_owned = 1;
4844 /* Take what we have of the result and let the Unicode formatting
4845 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004846 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004847 if (_PyString_Resize(&result, rescnt))
4848 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004849 fmtcnt = PyString_GET_SIZE(format) - \
4850 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004851 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4852 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004853 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004854 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004855 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004856 if (v == NULL)
4857 goto error;
4858 /* Paste what we have (result) to what the Unicode formatting
4859 function returned (v) and return the result (or error) */
4860 w = PyUnicode_Concat(result, v);
4861 Py_DECREF(result);
4862 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004863 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004864 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004865#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004866
Guido van Rossume5372401993-03-16 12:15:04 +00004867 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004868 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004869 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004871 }
Guido van Rossume5372401993-03-16 12:15:04 +00004872 return NULL;
4873}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004874
Guido van Rossum2a61e741997-01-18 07:55:05 +00004875void
Fred Drakeba096332000-07-09 07:04:36 +00004876PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004877{
4878 register PyStringObject *s = (PyStringObject *)(*p);
4879 PyObject *t;
4880 if (s == NULL || !PyString_Check(s))
4881 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004882 /* If it's a string subclass, we don't really know what putting
4883 it in the interned dict might do. */
4884 if (!PyString_CheckExact(s))
4885 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004886 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004887 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004888 if (interned == NULL) {
4889 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004890 if (interned == NULL) {
4891 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004892 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004893 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004894 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004895 t = PyDict_GetItem(interned, (PyObject *)s);
4896 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004897 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004898 Py_DECREF(*p);
4899 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004900 return;
4901 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004902
Armin Rigo79f7ad22004-08-07 19:27:39 +00004903 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004904 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004905 return;
4906 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004907 /* The two references in interned are not counted by refcnt.
4908 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004909 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004910 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004911}
4912
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004913void
4914PyString_InternImmortal(PyObject **p)
4915{
4916 PyString_InternInPlace(p);
4917 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4918 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4919 Py_INCREF(*p);
4920 }
4921}
4922
Guido van Rossum2a61e741997-01-18 07:55:05 +00004923
4924PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004925PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004926{
4927 PyObject *s = PyString_FromString(cp);
4928 if (s == NULL)
4929 return NULL;
4930 PyString_InternInPlace(&s);
4931 return s;
4932}
4933
Guido van Rossum8cf04761997-08-02 02:57:45 +00004934void
Fred Drakeba096332000-07-09 07:04:36 +00004935PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004936{
4937 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004938 for (i = 0; i < UCHAR_MAX + 1; i++) {
4939 Py_XDECREF(characters[i]);
4940 characters[i] = NULL;
4941 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004942 Py_XDECREF(nullstring);
4943 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004944}
Barry Warsawa903ad982001-02-23 16:40:48 +00004945
Barry Warsawa903ad982001-02-23 16:40:48 +00004946void _Py_ReleaseInternedStrings(void)
4947{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004948 PyObject *keys;
4949 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004950 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004951
4952 if (interned == NULL || !PyDict_Check(interned))
4953 return;
4954 keys = PyDict_Keys(interned);
4955 if (keys == NULL || !PyList_Check(keys)) {
4956 PyErr_Clear();
4957 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004958 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959
4960 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4961 detector, interned strings are not forcibly deallocated; rather, we
4962 give them their stolen references back, and then clear and DECREF
4963 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004964
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004965 fprintf(stderr, "releasing interned strings\n");
4966 n = PyList_GET_SIZE(keys);
4967 for (i = 0; i < n; i++) {
4968 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4969 switch (s->ob_sstate) {
4970 case SSTATE_NOT_INTERNED:
4971 /* XXX Shouldn't happen */
4972 break;
4973 case SSTATE_INTERNED_IMMORTAL:
4974 s->ob_refcnt += 1;
4975 break;
4976 case SSTATE_INTERNED_MORTAL:
4977 s->ob_refcnt += 2;
4978 break;
4979 default:
4980 Py_FatalError("Inconsistent interned string state.");
4981 }
4982 s->ob_sstate = SSTATE_NOT_INTERNED;
4983 }
4984 Py_DECREF(keys);
4985 PyDict_Clear(interned);
4986 Py_DECREF(interned);
4987 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004988}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004989
4990
4991/*********************** Str Iterator ****************************/
4992
4993typedef struct {
4994 PyObject_HEAD
4995 long it_index;
4996 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4997} striterobject;
4998
4999static void
5000striter_dealloc(striterobject *it)
5001{
5002 _PyObject_GC_UNTRACK(it);
5003 Py_XDECREF(it->it_seq);
5004 PyObject_GC_Del(it);
5005}
5006
5007static int
5008striter_traverse(striterobject *it, visitproc visit, void *arg)
5009{
5010 Py_VISIT(it->it_seq);
5011 return 0;
5012}
5013
5014static PyObject *
5015striter_next(striterobject *it)
5016{
5017 PyStringObject *seq;
5018 PyObject *item;
5019
5020 assert(it != NULL);
5021 seq = it->it_seq;
5022 if (seq == NULL)
5023 return NULL;
5024 assert(PyString_Check(seq));
5025
5026 if (it->it_index < PyString_GET_SIZE(seq)) {
5027 item = PyString_FromStringAndSize(PyString_AS_STRING(seq)+it->it_index, 1);
5028 if (item != NULL)
5029 ++it->it_index;
5030 return item;
5031 }
5032
5033 Py_DECREF(seq);
5034 it->it_seq = NULL;
5035 return NULL;
5036}
5037
5038static PyObject *
5039striter_len(striterobject *it)
5040{
5041 Py_ssize_t len = 0;
5042 if (it->it_seq)
5043 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5044 return PyInt_FromSsize_t(len);
5045}
5046
5047PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
5048
5049static PyMethodDef striter_methods[] = {
5050 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS, length_hint_doc},
5051 {NULL, NULL} /* sentinel */
5052};
5053
5054PyTypeObject PyStringIter_Type = {
5055 PyObject_HEAD_INIT(&PyType_Type)
5056 0, /* ob_size */
5057 "striterator", /* tp_name */
5058 sizeof(striterobject), /* tp_basicsize */
5059 0, /* tp_itemsize */
5060 /* methods */
5061 (destructor)striter_dealloc, /* tp_dealloc */
5062 0, /* tp_print */
5063 0, /* tp_getattr */
5064 0, /* tp_setattr */
5065 0, /* tp_compare */
5066 0, /* tp_repr */
5067 0, /* tp_as_number */
5068 0, /* tp_as_sequence */
5069 0, /* tp_as_mapping */
5070 0, /* tp_hash */
5071 0, /* tp_call */
5072 0, /* tp_str */
5073 PyObject_GenericGetAttr, /* tp_getattro */
5074 0, /* tp_setattro */
5075 0, /* tp_as_buffer */
5076 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5077 0, /* tp_doc */
5078 (traverseproc)striter_traverse, /* tp_traverse */
5079 0, /* tp_clear */
5080 0, /* tp_richcompare */
5081 0, /* tp_weaklistoffset */
5082 PyObject_SelfIter, /* tp_iter */
5083 (iternextfunc)striter_next, /* tp_iternext */
5084 striter_methods, /* tp_methods */
5085 0,
5086};
5087
5088static PyObject *
5089str_iter(PyObject *seq)
5090{
5091 striterobject *it;
5092
5093 if (!PyString_Check(seq)) {
5094 PyErr_BadInternalCall();
5095 return NULL;
5096 }
5097 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5098 if (it == NULL)
5099 return NULL;
5100 it->it_index = 0;
5101 Py_INCREF(seq);
5102 it->it_seq = (PyStringObject *)seq;
5103 _PyObject_GC_TRACK(it);
5104 return (PyObject *)it;
5105}