blob: 94943f60e7290cfce81cb23ef80ce4f3c28fd989 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Thomas Wouters477c8d52006-05-27 19:21:47 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Thomas Wouters477c8d52006-05-27 19:21:47 +0000769/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770/* Methods */
771
Thomas Wouters477c8d52006-05-27 19:21:47 +0000772#define STRINGLIB_CHAR char
773
774#define STRINGLIB_CMP memcmp
775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
778
779#define STRINGLIB_EMPTY nullstring
780
781#include "stringlib/fastsearch.h"
782
783#include "stringlib/count.h"
784#include "stringlib/find.h"
785#include "stringlib/partition.h"
786
787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000807 char *data = op->ob_sval;
808 Py_ssize_t size = op->ob_size;
809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000829 if (memchr(op->ob_sval, '\'', op->ob_size) &&
830 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 for (i = 0; i < op->ob_size; i++) {
835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000857 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000875 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000876 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000877 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Woutersd4ec0c32006-04-21 16:44:05 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
931 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
938 return a->ob_size;
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000951 if (PyBytes_Check(bb))
952 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000953 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000954 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000955 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956 return NULL;
957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000960 if ((a->ob_size == 0 || b->ob_size == 0) &&
961 PyString_CheckExact(a) && PyString_CheckExact(b)) {
962 if (a->ob_size == 0) {
963 Py_INCREF(bb);
964 return bb;
965 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966 Py_INCREF(a);
967 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 }
969 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000970 if (size < 0) {
971 PyErr_SetString(PyExc_OverflowError,
972 "strings are too large to concat");
973 return NULL;
974 }
975
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000976 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000977 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000978 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000980 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000981 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000982 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000983 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
984 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000985 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987#undef b
988}
989
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000993 register Py_ssize_t i;
994 register Py_ssize_t j;
995 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000997 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 if (n < 0)
999 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001000 /* watch out for overflows: the size can overflow int,
1001 * and the # of bytes needed can overflow size_t
1002 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001004 if (n && size / n != a->ob_size) {
1005 PyErr_SetString(PyExc_OverflowError,
1006 "repeated string is too long");
1007 return NULL;
1008 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001009 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 Py_INCREF(a);
1011 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012 }
Tim Peterse7c05322004-06-27 17:24:49 +00001013 nbytes = (size_t)size;
1014 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001020 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001021 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001023 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001024 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001025 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001026 op->ob_sval[size] = '\0';
1027 if (a->ob_size == 1 && n > 0) {
1028 memset(op->ob_sval, a->ob_sval[0] , n);
1029 return (PyObject *) op;
1030 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001031 i = 0;
1032 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001033 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001035 }
1036 while (i < size) {
1037 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001038 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001039 i += j;
1040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042}
1043
1044/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1045
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001047string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001049 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050{
1051 if (i < 0)
1052 i = 0;
1053 if (j < 0)
1054 j = 0; /* Avoid signed/unsigned bug in next line */
1055 if (j > a->ob_size)
1056 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001057 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1058 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059 Py_INCREF(a);
1060 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001061 }
1062 if (j < i)
1063 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001064 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001065}
1066
Guido van Rossum9284a572000-03-07 15:53:43 +00001067static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001068string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001069{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001071#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 if (PyUnicode_Check(sub_obj))
1073 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001074#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001076 PyErr_Format(PyExc_TypeError,
1077 "'in <string>' requires string as left operand, "
1078 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001079 return -1;
1080 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001081 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001082
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001084}
1085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001086static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001087string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001089 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001092 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return NULL;
1094 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 pchar = a->ob_sval[i];
1096 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001097 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001099 else {
1100#ifdef COUNT_ALLOCS
1101 one_strings++;
1102#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001103 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001105 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001106}
1107
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108static PyObject*
1109string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112 Py_ssize_t len_a, len_b;
1113 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 PyObject *result;
1115
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 result = Py_NotImplemented;
1119 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001120 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121 if (a == b) {
1122 switch (op) {
1123 case Py_EQ:case Py_LE:case Py_GE:
1124 result = Py_True;
1125 goto out;
1126 case Py_NE:case Py_LT:case Py_GT:
1127 result = Py_False;
1128 goto out;
1129 }
1130 }
1131 if (op == Py_EQ) {
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a->ob_size == b->ob_size
1135 && (a->ob_sval[0] == b->ob_sval[0]
Thomas Wouters27d517b2007-02-25 20:39:11 +00001136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 result = Py_True;
1138 } else {
1139 result = Py_False;
1140 }
1141 goto out;
1142 }
1143 len_a = a->ob_size; len_b = b->ob_size;
1144 min_len = (len_a < len_b) ? len_a : len_b;
1145 if (min_len > 0) {
1146 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1147 if (c==0)
1148 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001149 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001150 c = 0;
1151 if (c == 0)
1152 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1153 switch (op) {
1154 case Py_LT: c = c < 0; break;
1155 case Py_LE: c = c <= 0; break;
1156 case Py_EQ: assert(0); break; /* unreachable */
1157 case Py_NE: c = c != 0; break;
1158 case Py_GT: c = c > 0; break;
1159 case Py_GE: c = c >= 0; break;
1160 default:
1161 result = Py_NotImplemented;
1162 goto out;
1163 }
1164 result = c ? Py_True : Py_False;
1165 out:
1166 Py_INCREF(result);
1167 return result;
1168}
1169
1170int
1171_PyString_Eq(PyObject *o1, PyObject *o2)
1172{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001173 PyStringObject *a = (PyStringObject*) o1;
1174 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001175 return a->ob_size == b->ob_size
1176 && *a->ob_sval == *b->ob_sval
1177 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001178}
1179
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180static long
Fred Drakeba096332000-07-09 07:04:36 +00001181string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001182{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001183 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 register unsigned char *p;
1185 register long x;
1186
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 if (a->ob_shash != -1)
1188 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 len = a->ob_size;
1190 p = (unsigned char *) a->ob_sval;
1191 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001192 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001193 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001194 x ^= a->ob_size;
1195 if (x == -1)
1196 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001197 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 return x;
1199}
1200
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201static PyObject*
1202string_subscript(PyStringObject* self, PyObject* item)
1203{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001204 if (PyIndex_Check(item)) {
1205 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 if (i == -1 && PyErr_Occurred())
1207 return NULL;
1208 if (i < 0)
1209 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001210 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 }
1212 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 char* source_buf;
1215 char* result_buf;
1216 PyObject* result;
1217
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001218 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 PyString_GET_SIZE(self),
1220 &start, &stop, &step, &slicelength) < 0) {
1221 return NULL;
1222 }
1223
1224 if (slicelength <= 0) {
1225 return PyString_FromStringAndSize("", 0);
1226 }
1227 else {
1228 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001229 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001230 if (result_buf == NULL)
1231 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001233 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 cur += step, i++) {
1235 result_buf[i] = source_buf[cur];
1236 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001237
1238 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 slicelength);
1240 PyMem_Free(result_buf);
1241 return result;
1242 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001243 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001245 PyErr_Format(PyExc_TypeError,
1246 "string indices must be integers, not %.200s",
1247 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248 return NULL;
1249 }
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
1255 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001257 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259 }
1260 *ptr = (void *)self->ob_sval;
1261 return self->ob_size;
1262}
1263
Martin v. Löwis18e16552006-02-15 17:27:45 +00001264static Py_ssize_t
1265string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266{
Guido van Rossum045e6881997-09-08 18:30:11 +00001267 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001268 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001269 return -1;
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( lenp )
1276 *lenp = self->ob_size;
1277 return 1;
1278}
1279
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280static Py_ssize_t
1281string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001282{
1283 if ( index != 0 ) {
1284 PyErr_SetString(PyExc_SystemError,
1285 "accessing non-existent string segment");
1286 return -1;
1287 }
1288 *ptr = self->ob_sval;
1289 return self->ob_size;
1290}
1291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001292static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001294 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (ssizeargfunc)string_repeat, /*sq_repeat*/
1296 (ssizeargfunc)string_item, /*sq_item*/
1297 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001298 0, /*sq_ass_item*/
1299 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001300 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001301};
1302
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001305 (binaryfunc)string_subscript,
1306 0,
1307};
1308
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001309static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 (readbufferproc)string_buffer_getreadbuf,
1311 (writebufferproc)string_buffer_getwritebuf,
1312 (segcountproc)string_buffer_getsegcount,
1313 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001314};
1315
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316
1317
1318#define LEFTSTRIP 0
1319#define RIGHTSTRIP 1
1320#define BOTHSTRIP 2
1321
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001322/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001323static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1324
1325#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001326
Thomas Wouters477c8d52006-05-27 19:21:47 +00001327
1328/* Don't call if length < 2 */
1329#define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1333
1334
1335/* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1341
1342#define MAX_PREALLOC 12
1343
1344/* 5 splits gives 6 elements */
1345#define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1347
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348#define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1356 } \
1357 else \
1358 Py_DECREF(str);
1359
Thomas Wouters477c8d52006-05-27 19:21:47 +00001360#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1363 if (str == NULL) \
1364 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1367 } else { \
1368 if (PyList_Append(list, str)) { \
1369 Py_DECREF(str); \
1370 goto onError; \
1371 } \
1372 else \
1373 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001374 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001378#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001379
1380#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1384
1385Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001386split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001388 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001389 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001390 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392 if (list == NULL)
1393 return NULL;
1394
Thomas Wouters477c8d52006-05-27 19:21:47 +00001395 i = j = 0;
1396
1397 while (maxsplit-- > 0) {
1398 SKIP_SPACE(s, i, len);
1399 if (i==len) break;
1400 j = i; i++;
1401 SKIP_NONSPACE(s, i, len);
1402 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001404
1405 if (i < len) {
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s, i, len);
1409 if (i != len)
1410 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001412 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 Py_DECREF(list);
1416 return NULL;
1417}
1418
Thomas Wouters477c8d52006-05-27 19:21:47 +00001419Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001420split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001422 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425
1426 if (list == NULL)
1427 return NULL;
1428
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 i = j = 0;
1430 while ((j < len) && (maxcount-- > 0)) {
1431 for(; j<len; j++) {
1432 /* I found that using memchr makes no difference */
1433 if (s[j] == ch) {
1434 SPLIT_ADD(s, i, j);
1435 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001437 }
1438 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001440 if (i <= len) {
1441 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001443 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444 return list;
1445
1446 onError:
1447 Py_DECREF(list);
1448 return NULL;
1449}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001451PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452"S.split([sep [,maxsplit]]) -> list of strings\n\
1453\n\
1454Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001456splits are done. If sep is not specified or is None, any\n\
1457whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
1459static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001460string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001462 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001463 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001465 PyObject *list, *str, *subobj = Py_None;
1466#ifdef USE_FAST
1467 Py_ssize_t pos;
1468#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001470 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001473 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1479 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001483#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 if (n == 0) {
1488 PyErr_SetString(PyExc_ValueError, "empty separator");
1489 return NULL;
1490 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491 else if (n == 1)
1492 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (list == NULL)
1496 return NULL;
1497
Thomas Wouters477c8d52006-05-27 19:21:47 +00001498#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001500 while (maxsplit-- > 0) {
1501 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502 if (pos < 0)
1503 break;
1504 j = i+pos;
1505 SPLIT_ADD(s, i, j);
1506 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001508#else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1516 }
1517 }
1518 }
1519#endif
1520 SPLIT_ADD(s, i, len);
1521 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return list;
1523
Thomas Wouters477c8d52006-05-27 19:21:47 +00001524 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Thomas Wouters477c8d52006-05-27 19:21:47 +00001529PyDoc_STRVAR(partition__doc__,
1530"S.partition(sep) -> (head, sep, tail)\n\
1531\n\
1532Searches for the separator sep in S, and returns the part before it,\n\
1533the separator itself, and the part after it. If the separator is not\n\
1534found, returns S and two empty strings.");
1535
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001536static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001537string_partition(PyStringObject *self, PyObject *sep_obj)
1538{
1539 const char *sep;
1540 Py_ssize_t sep_len;
1541
1542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
1545 }
1546#ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
1548 return PyUnicode_Partition((PyObject *) self, sep_obj);
1549#endif
1550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1551 return NULL;
1552
1553 return stringlib_partition(
1554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1557 );
1558}
1559
1560PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001561"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562\n\
1563Searches for the separator sep in S, starting at the end of S, and returns\n\
1564the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001565separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566
1567static PyObject *
1568string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569{
1570 const char *sep;
1571 Py_ssize_t sep_len;
1572
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1576 }
1577#ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580#endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1583
1584 return stringlib_rpartition(
1585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1588 );
1589}
1590
1591Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001592rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001594 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597
1598 if (list == NULL)
1599 return NULL;
1600
Thomas Wouters477c8d52006-05-27 19:21:47 +00001601 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001602
Thomas Wouters477c8d52006-05-27 19:21:47 +00001603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1616
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 FIX_PREALLOC_SIZE(list);
1619 if (PyList_Reverse(list) < 0)
1620 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Thomas Wouters477c8d52006-05-27 19:21:47 +00001627Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001628rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001630 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Thomas Wouters477c8d52006-05-27 19:21:47 +00001637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001644 }
1645 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
1647 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001648 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001650 FIX_PREALLOC_SIZE(list);
1651 if (PyList_Reverse(list) < 0)
1652 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 return list;
1654
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1658}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659
1660PyDoc_STRVAR(rsplit__doc__,
1661"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662\n\
1663Return a list of the words in the string S, using sep as the\n\
1664delimiter string, starting at the end of the string and working\n\
1665to the front. If maxsplit is given, at most maxsplit splits are\n\
1666done. If sep is not specified or is None, any whitespace string\n\
1667is a separator.");
1668
1669static PyObject *
1670string_rsplit(PyStringObject *self, PyObject *args)
1671{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001675 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return NULL;
1679 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001680 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
1687#ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690#endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1697 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700
Thomas Wouters477c8d52006-05-27 19:21:47 +00001701 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 if (list == NULL)
1703 return NULL;
1704
1705 j = len;
1706 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707
Thomas Wouters477c8d52006-05-27 19:21:47 +00001708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
1714 break;
1715 }
1716 }
1717 }
1718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return list;
1723
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 Py_DECREF(list);
1726 return NULL;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731"S.join(sequence) -> string\n\
1732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
1736static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001737string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
1739 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 return NULL;
1751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001753 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1757 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001763 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001768 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1783 */
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001786 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001787 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001791 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001792 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001793 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 Py_DECREF(seq);
1795 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001802 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 Py_DECREF(seq);
1804 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 }
1807
1808 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001809 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1813 }
1814
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001821 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 p += n;
1823 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001824 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001828
Jeremy Hylton49048292000-07-11 03:28:17 +00001829 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
Tim Peters52e155e2001-06-16 05:42:57 +00001833PyObject *
1834_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001835{
Tim Petersa7259592001-06-16 05:11:17 +00001836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001839}
1840
Thomas Wouters477c8d52006-05-27 19:21:47 +00001841Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001843{
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1854}
1855
Thomas Wouters477c8d52006-05-27 19:21:47 +00001856Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001857string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001860 const char *sub;
1861 Py_ssize_t sub_len;
1862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
Thomas Wouters477c8d52006-05-27 19:21:47 +00001864 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 return -2;
1867 if (PyString_Check(subobj)) {
1868 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001869 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001871#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001873 return PyUnicode_Find(
1874 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001876 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 return -2;
1880
Thomas Wouters477c8d52006-05-27 19:21:47 +00001881 if (dir > 0)
1882 return stringlib_find_slice(
1883 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884 sub, sub_len, start, end);
1885 else
1886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889}
1890
1891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893"S.find(sub [,start [,end]]) -> int\n\
1894\n\
1895Return the lowest index in S where substring sub is found,\n\
1896such that sub is contained within s[start,end]. Optional\n\
1897arguments start and end are interpreted as in slice notation.\n\
1898\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001902string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (result == -2)
1906 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.index(sub [,start [,end]]) -> int\n\
1913\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001914Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
1916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001917string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920 if (result == -2)
1921 return NULL;
1922 if (result == -1) {
1923 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001924 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 return NULL;
1926 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928}
1929
1930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932"S.rfind(sub [,start [,end]]) -> int\n\
1933\n\
1934Return the highest index in S where substring sub is found,\n\
1935such that sub is contained within s[start,end]. Optional\n\
1936arguments start and end are interpreted as in slice notation.\n\
1937\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 if (result == -2)
1945 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rindex(sub [,start [,end]]) -> int\n\
1952\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001953Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
1955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001956string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001958 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 if (result == -2)
1960 return NULL;
1961 if (result == -1) {
1962 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001963 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return NULL;
1965 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Thomas Wouters477c8d52006-05-27 19:21:47 +00001970Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1972{
1973 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978
1979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
1981 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982 i++;
1983 }
1984 }
1985
1986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
1990 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991 j++;
1992 }
1993
1994 if (i == 0 && j == len && PyString_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyString_FromStringAndSize(s+i, j-i);
2000}
2001
2002
Thomas Wouters477c8d52006-05-27 19:21:47 +00002003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002004do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005{
2006 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012 i++;
2013 }
2014 }
2015
2016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021 j++;
2022 }
2023
Tim Peters8fa5dd02001-09-12 02:18:30 +00002024 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 Py_INCREF(self);
2026 return (PyObject*)self;
2027 }
2028 else
2029 return PyString_FromStringAndSize(s+i, j-i);
2030}
2031
2032
Thomas Wouters477c8d52006-05-27 19:21:47 +00002033Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002034do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2035{
2036 PyObject *sep = NULL;
2037
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002038 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039 return NULL;
2040
2041 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 if (PyString_Check(sep))
2043 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002045 else if (PyUnicode_Check(sep)) {
2046 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047 PyObject *res;
2048 if (uniself==NULL)
2049 return NULL;
2050 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051 striptype, sep);
2052 Py_DECREF(uniself);
2053 return res;
2054 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002056 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#ifdef Py_USING_UNICODE
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002058 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#else
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002060 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002062 STRIPNAME(striptype));
2063 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064 }
2065
2066 return do_strip(self, striptype);
2067}
2068
2069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002070PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002071"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072\n\
2073Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002074whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075If chars is given and not None, remove characters in chars instead.\n\
2076If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077
2078static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081 if (PyTuple_GET_SIZE(args) == 0)
2082 return do_strip(self, BOTHSTRIP); /* Common case */
2083 else
2084 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085}
2086
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002089"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092If chars is given and not None, remove characters in chars instead.\n\
2093If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
2095static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, LEFTSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102}
2103
2104
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002108Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109If chars is given and not None, remove characters in chars instead.\n\
2110If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
2112static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, RIGHTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123"S.lower() -> string\n\
2124\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126
Thomas Wouters477c8d52006-05-27 19:21:47 +00002127/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128#ifndef _tolower
2129#define _tolower tolower
2130#endif
2131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002133string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002135 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002136 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002137 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002139 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002140 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142
2143 s = PyString_AS_STRING(newobj);
2144
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002145 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002148 int c = Py_CHARMASK(s[i]);
2149 if (isupper(c))
2150 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002152
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002153 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154}
2155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157"S.upper() -> string\n\
2158\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161#ifndef _toupper
2162#define _toupper toupper
2163#endif
2164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002166string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002169 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002170 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002172 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002173 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002175
2176 s = PyString_AS_STRING(newobj);
2177
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002178 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002181 int c = Py_CHARMASK(s[i]);
2182 if (islower(c))
2183 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002185
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002186 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187}
2188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190"S.title() -> string\n\
2191\n\
2192Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194
2195static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002196string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197{
2198 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002199 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002201 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002203 newobj = PyString_FromStringAndSize(NULL, n);
2204 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002206 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 for (i = 0; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (islower(c)) {
2210 if (!previous_is_cased)
2211 c = toupper(c);
2212 previous_is_cased = 1;
2213 } else if (isupper(c)) {
2214 if (previous_is_cased)
2215 c = tolower(c);
2216 previous_is_cased = 1;
2217 } else
2218 previous_is_cased = 0;
2219 *s_new++ = c;
2220 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002221 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222}
2223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225"S.capitalize() -> string\n\
2226\n\
2227Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
2230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002231string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002234 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002235 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002237 newobj = PyString_FromStringAndSize(NULL, n);
2238 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002240 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 if (0 < n) {
2242 int c = Py_CHARMASK(*s++);
2243 if (islower(c))
2244 *s_new = toupper(c);
2245 else
2246 *s_new = c;
2247 s_new++;
2248 }
2249 for (i = 1; i < n; i++) {
2250 int c = Py_CHARMASK(*s++);
2251 if (isupper(c))
2252 *s_new = tolower(c);
2253 else
2254 *s_new = c;
2255 s_new++;
2256 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
2260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002261PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262"S.count(sub[, start[, end]]) -> int\n\
2263\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264Return the number of non-overlapping occurrences of substring sub in\n\
2265string S[start:end]. Optional arguments start and end are interpreted\n\
2266as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
2268static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002269string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 PyObject *sub_obj;
2272 const char *str = PyString_AS_STRING(self), *sub;
2273 Py_ssize_t sub_len;
2274 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
Thomas Wouters477c8d52006-05-27 19:21:47 +00002276 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002279
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 if (PyString_Check(sub_obj)) {
2281 sub = PyString_AS_STRING(sub_obj);
2282 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002284#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002286 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002288 if (count == -1)
2289 return NULL;
2290 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002291 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 return NULL;
2296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002298
Thomas Wouters477c8d52006-05-27 19:21:47 +00002299 return PyInt_FromSsize_t(
2300 stringlib_count(str + start, end - start, sub, sub_len)
2301 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302}
2303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305"S.swapcase() -> string\n\
2306\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309
2310static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002311string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312{
2313 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002314 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002315 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002317 newobj = PyString_FromStringAndSize(NULL, n);
2318 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002320 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 for (i = 0; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (islower(c)) {
2324 *s_new = toupper(c);
2325 }
2326 else if (isupper(c)) {
2327 *s_new = tolower(c);
2328 }
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002333 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334}
2335
2336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002337PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338"S.translate(table [,deletechars]) -> string\n\
2339\n\
2340Return a copy of the string S, where all characters occurring\n\
2341in the optional argument deletechars are removed, and the\n\
2342remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002343translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344
2345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002346string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 register char *input, *output;
2349 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002350 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002353 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 PyObject *result;
2355 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002358 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361
2362 if (PyString_Check(tableobj)) {
2363 table1 = PyString_AS_STRING(tableobj);
2364 tablen = PyString_GET_SIZE(tableobj);
2365 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002366#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002368 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 parameter; instead a mapping to None will cause characters
2370 to be deleted. */
2371 if (delobj != NULL) {
2372 PyErr_SetString(PyExc_TypeError,
2373 "deletions are implemented differently for unicode");
2374 return NULL;
2375 }
2376 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2377 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002378#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381
Martin v. Löwis00b61272002-12-12 20:03:19 +00002382 if (tablen != 256) {
2383 PyErr_SetString(PyExc_ValueError,
2384 "translation table must be 256 characters long");
2385 return NULL;
2386 }
2387
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 if (delobj != NULL) {
2389 if (PyString_Check(delobj)) {
2390 del_table = PyString_AS_STRING(delobj);
2391 dellen = PyString_GET_SIZE(delobj);
2392 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002393#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394 else if (PyUnicode_Check(delobj)) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 }
2403 else {
2404 del_table = NULL;
2405 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406 }
2407
2408 table = table1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002409 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 result = PyString_FromStringAndSize((char *)NULL, inlen);
2411 if (result == NULL)
2412 return NULL;
2413 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002414 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415
2416 if (dellen == 0) {
2417 /* If no deletions are required, use faster code */
2418 for (i = inlen; --i >= 0; ) {
2419 c = Py_CHARMASK(*input++);
2420 if (Py_CHARMASK((*output++ = table[c])) != c)
2421 changed = 1;
2422 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002423 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 return result;
2425 Py_DECREF(result);
2426 Py_INCREF(input_obj);
2427 return input_obj;
2428 }
2429
2430 for (i = 0; i < 256; i++)
2431 trans_table[i] = Py_CHARMASK(table[i]);
2432
2433 for (i = 0; i < dellen; i++)
2434 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2435
2436 for (i = inlen; --i >= 0; ) {
2437 c = Py_CHARMASK(*input++);
2438 if (trans_table[c] != -1)
2439 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2440 continue;
2441 changed = 1;
2442 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002443 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2447 }
2448 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002449 if (inlen > 0)
2450 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 return result;
2452}
2453
2454
Thomas Wouters477c8d52006-05-27 19:21:47 +00002455#define FORWARD 1
2456#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457
Thomas Wouters477c8d52006-05-27 19:21:47 +00002458/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459
Thomas Wouters477c8d52006-05-27 19:21:47 +00002460#define findchar(target, target_len, c) \
2461 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463/* String ops must return a string. */
2464/* If the object is subclass of string, create a copy */
2465Py_LOCAL(PyStringObject *)
2466return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002468 if (PyString_CheckExact(self)) {
2469 Py_INCREF(self);
2470 return self;
2471 }
2472 return (PyStringObject *)PyString_FromStringAndSize(
2473 PyString_AS_STRING(self),
2474 PyString_GET_SIZE(self));
2475}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Thomas Wouters477c8d52006-05-27 19:21:47 +00002477Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002478countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479{
2480 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002481 const char *start=target;
2482 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483
Thomas Wouters477c8d52006-05-27 19:21:47 +00002484 while ( (start=findchar(start, end-start, c)) != NULL ) {
2485 count++;
2486 if (count >= maxcount)
2487 break;
2488 start += 1;
2489 }
2490 return count;
2491}
2492
2493Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002494findstring(const char *target, Py_ssize_t target_len,
2495 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496 Py_ssize_t start,
2497 Py_ssize_t end,
2498 int direction)
2499{
2500 if (start < 0) {
2501 start += target_len;
2502 if (start < 0)
2503 start = 0;
2504 }
2505 if (end > target_len) {
2506 end = target_len;
2507 } else if (end < 0) {
2508 end += target_len;
2509 if (end < 0)
2510 end = 0;
2511 }
2512
2513 /* zero-length substrings always match at the first attempt */
2514 if (pattern_len == 0)
2515 return (direction > 0) ? start : end;
2516
2517 end -= pattern_len;
2518
2519 if (direction < 0) {
2520 for (; end >= start; end--)
2521 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2522 return end;
2523 } else {
2524 for (; start <= end; start++)
2525 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2526 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002527 }
2528 return -1;
2529}
2530
Thomas Wouters477c8d52006-05-27 19:21:47 +00002531Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002532countstring(const char *target, Py_ssize_t target_len,
2533 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 Py_ssize_t start,
2535 Py_ssize_t end,
2536 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002538 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539
Thomas Wouters477c8d52006-05-27 19:21:47 +00002540 if (start < 0) {
2541 start += target_len;
2542 if (start < 0)
2543 start = 0;
2544 }
2545 if (end > target_len) {
2546 end = target_len;
2547 } else if (end < 0) {
2548 end += target_len;
2549 if (end < 0)
2550 end = 0;
2551 }
2552
2553 /* zero-length substrings match everywhere */
2554 if (pattern_len == 0 || maxcount == 0) {
2555 if (target_len+1 < maxcount)
2556 return target_len+1;
2557 return maxcount;
2558 }
2559
2560 end -= pattern_len;
2561 if (direction < 0) {
2562 for (; (end >= start); end--)
2563 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2564 count++;
2565 if (--maxcount <= 0) break;
2566 end -= pattern_len-1;
2567 }
2568 } else {
2569 for (; (start <= end); start++)
2570 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2571 count++;
2572 if (--maxcount <= 0)
2573 break;
2574 start += pattern_len-1;
2575 }
2576 }
2577 return count;
2578}
2579
2580
2581/* Algorithms for different cases of string replacement */
2582
2583/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2584Py_LOCAL(PyStringObject *)
2585replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002586 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002587 Py_ssize_t maxcount)
2588{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002589 char *self_s, *result_s;
2590 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002591 Py_ssize_t count, i, product;
2592 PyStringObject *result;
2593
2594 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002595
Thomas Wouters477c8d52006-05-27 19:21:47 +00002596 /* 1 at the end plus 1 after every character */
2597 count = self_len+1;
2598 if (maxcount < count)
2599 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002600
Thomas Wouters477c8d52006-05-27 19:21:47 +00002601 /* Check for overflow */
2602 /* result_len = count * to_len + self_len; */
2603 product = count * to_len;
2604 if (product / to_len != count) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "replace string is too long");
2607 return NULL;
2608 }
2609 result_len = product + self_len;
2610 if (result_len < 0) {
2611 PyErr_SetString(PyExc_OverflowError,
2612 "replace string is too long");
2613 return NULL;
2614 }
2615
2616 if (! (result = (PyStringObject *)
2617 PyString_FromStringAndSize(NULL, result_len)) )
2618 return NULL;
2619
2620 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002621 result_s = PyString_AS_STRING(result);
2622
2623 /* TODO: special case single character, which doesn't need memcpy */
2624
2625 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002626 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002627 result_s += to_len;
2628 count -= 1;
2629
2630 for (i=0; i<count; i++) {
2631 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002632 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002633 result_s += to_len;
2634 }
2635
2636 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002637 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002638
2639 return result;
2640}
2641
2642/* Special case for deleting a single character */
2643/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2644Py_LOCAL(PyStringObject *)
2645replace_delete_single_character(PyStringObject *self,
2646 char from_c, Py_ssize_t maxcount)
2647{
2648 char *self_s, *result_s;
2649 char *start, *next, *end;
2650 Py_ssize_t self_len, result_len;
2651 Py_ssize_t count;
2652 PyStringObject *result;
2653
2654 self_len = PyString_GET_SIZE(self);
2655 self_s = PyString_AS_STRING(self);
2656
2657 count = countchar(self_s, self_len, from_c, maxcount);
2658 if (count == 0) {
2659 return return_self(self);
2660 }
2661
2662 result_len = self_len - count; /* from_len == 1 */
2663 assert(result_len>=0);
2664
2665 if ( (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2667 return NULL;
2668 result_s = PyString_AS_STRING(result);
2669
2670 start = self_s;
2671 end = self_s + self_len;
2672 while (count-- > 0) {
2673 next = findchar(start, end-start, from_c);
2674 if (next == NULL)
2675 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002676 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002677 result_s += (next-start);
2678 start = next+1;
2679 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002680 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002681
Thomas Wouters477c8d52006-05-27 19:21:47 +00002682 return result;
2683}
2684
2685/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2686
2687Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688replace_delete_substring(PyStringObject *self,
2689 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002690 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002691 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002692 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002693 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002694 Py_ssize_t count, offset;
2695 PyStringObject *result;
2696
2697 self_len = PyString_GET_SIZE(self);
2698 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002699
2700 count = countstring(self_s, self_len,
2701 from_s, from_len,
2702 0, self_len, 1,
2703 maxcount);
2704
2705 if (count == 0) {
2706 /* no matches */
2707 return return_self(self);
2708 }
2709
2710 result_len = self_len - (count * from_len);
2711 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002712
Thomas Wouters477c8d52006-05-27 19:21:47 +00002713 if ( (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2715 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002716
Thomas Wouters477c8d52006-05-27 19:21:47 +00002717 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002718
Thomas Wouters477c8d52006-05-27 19:21:47 +00002719 start = self_s;
2720 end = self_s + self_len;
2721 while (count-- > 0) {
2722 offset = findstring(start, end-start,
2723 from_s, from_len,
2724 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002725 if (offset == -1)
2726 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002727 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002728
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002729 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002730
Thomas Wouters477c8d52006-05-27 19:21:47 +00002731 result_s += (next-start);
2732 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002734 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002735 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002736}
2737
Thomas Wouters477c8d52006-05-27 19:21:47 +00002738/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2739Py_LOCAL(PyStringObject *)
2740replace_single_character_in_place(PyStringObject *self,
2741 char from_c, char to_c,
2742 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002743{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002744 char *self_s, *result_s, *start, *end, *next;
2745 Py_ssize_t self_len;
2746 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002747
Thomas Wouters477c8d52006-05-27 19:21:47 +00002748 /* The result string will be the same size */
2749 self_s = PyString_AS_STRING(self);
2750 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002751
Thomas Wouters477c8d52006-05-27 19:21:47 +00002752 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002753
Thomas Wouters477c8d52006-05-27 19:21:47 +00002754 if (next == NULL) {
2755 /* No matches; return the original string */
2756 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002757 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002758
Thomas Wouters477c8d52006-05-27 19:21:47 +00002759 /* Need to make a new string */
2760 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2761 if (result == NULL)
2762 return NULL;
2763 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002764 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002765
Thomas Wouters477c8d52006-05-27 19:21:47 +00002766 /* change everything in-place, starting with this one */
2767 start = result_s + (next-self_s);
2768 *start = to_c;
2769 start++;
2770 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002771
Thomas Wouters477c8d52006-05-27 19:21:47 +00002772 while (--maxcount > 0) {
2773 next = findchar(start, end-start, from_c);
2774 if (next == NULL)
2775 break;
2776 *next = to_c;
2777 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002778 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002779
Thomas Wouters477c8d52006-05-27 19:21:47 +00002780 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002781}
2782
Thomas Wouters477c8d52006-05-27 19:21:47 +00002783/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2784Py_LOCAL(PyStringObject *)
2785replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002786 const char *from_s, Py_ssize_t from_len,
2787 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 Py_ssize_t maxcount)
2789{
2790 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002791 char *self_s;
2792 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002793 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002794
Thomas Wouters477c8d52006-05-27 19:21:47 +00002795 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002796
Thomas Wouters477c8d52006-05-27 19:21:47 +00002797 self_s = PyString_AS_STRING(self);
2798 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002799
Thomas Wouters477c8d52006-05-27 19:21:47 +00002800 offset = findstring(self_s, self_len,
2801 from_s, from_len,
2802 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002803 if (offset == -1) {
2804 /* No matches; return the original string */
2805 return return_self(self);
2806 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002807
Thomas Wouters477c8d52006-05-27 19:21:47 +00002808 /* Need to make a new string */
2809 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2810 if (result == NULL)
2811 return NULL;
2812 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002813 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002814
Thomas Wouters477c8d52006-05-27 19:21:47 +00002815 /* change everything in-place, starting with this one */
2816 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002817 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002818 start += from_len;
2819 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002820
Thomas Wouters477c8d52006-05-27 19:21:47 +00002821 while ( --maxcount > 0) {
2822 offset = findstring(start, end-start,
2823 from_s, from_len,
2824 0, end-start, FORWARD);
2825 if (offset==-1)
2826 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002827 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002828 start += offset+from_len;
2829 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002830
Thomas Wouters477c8d52006-05-27 19:21:47 +00002831 return result;
2832}
2833
2834/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2835Py_LOCAL(PyStringObject *)
2836replace_single_character(PyStringObject *self,
2837 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002838 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002839 Py_ssize_t maxcount)
2840{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002841 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002842 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002843 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002844 Py_ssize_t count, product;
2845 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002846
Thomas Wouters477c8d52006-05-27 19:21:47 +00002847 self_s = PyString_AS_STRING(self);
2848 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002849
Thomas Wouters477c8d52006-05-27 19:21:47 +00002850 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2854 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002855
Thomas Wouters477c8d52006-05-27 19:21:47 +00002856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product = count * (to_len-1);
2859 if (product / (to_len-1) != count) {
2860 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861 return NULL;
2862 }
2863 result_len = self_len + product;
2864 if (result_len < 0) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2867 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002868
Thomas Wouters477c8d52006-05-27 19:21:47 +00002869 if ( (result = (PyStringObject *)
2870 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871 return NULL;
2872 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002873
Thomas Wouters477c8d52006-05-27 19:21:47 +00002874 start = self_s;
2875 end = self_s + self_len;
2876 while (count-- > 0) {
2877 next = findchar(start, end-start, from_c);
2878 if (next == NULL)
2879 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002880
Thomas Wouters477c8d52006-05-27 19:21:47 +00002881 if (next == start) {
2882 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002883 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 result_s += to_len;
2885 start += 1;
2886 } else {
2887 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002888 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002889 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002890 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002891 result_s += to_len;
2892 start = next+1;
2893 }
2894 }
2895 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002896 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002897
Thomas Wouters477c8d52006-05-27 19:21:47 +00002898 return result;
2899}
2900
2901/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2902Py_LOCAL(PyStringObject *)
2903replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002904 const char *from_s, Py_ssize_t from_len,
2905 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002906 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002907 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002908 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002909 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002910 Py_ssize_t count, offset, product;
2911 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002912
Thomas Wouters477c8d52006-05-27 19:21:47 +00002913 self_s = PyString_AS_STRING(self);
2914 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002915
Thomas Wouters477c8d52006-05-27 19:21:47 +00002916 count = countstring(self_s, self_len,
2917 from_s, from_len,
2918 0, self_len, FORWARD, maxcount);
2919 if (count == 0) {
2920 /* no matches, return unchanged */
2921 return return_self(self);
2922 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002923
Thomas Wouters477c8d52006-05-27 19:21:47 +00002924 /* Check for overflow */
2925 /* result_len = self_len + count * (to_len-from_len) */
2926 product = count * (to_len-from_len);
2927 if (product / (to_len-from_len) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2930 }
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002936
Thomas Wouters477c8d52006-05-27 19:21:47 +00002937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002941
Thomas Wouters477c8d52006-05-27 19:21:47 +00002942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 offset = findstring(start, end-start,
2946 from_s, from_len,
2947 0, end-start, FORWARD);
2948 if (offset == -1)
2949 break;
2950 next = start+offset;
2951 if (next == start) {
2952 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002953 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002954 result_s += to_len;
2955 start += from_len;
2956 } else {
2957 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002958 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002959 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002960 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002961 result_s += to_len;
2962 start = next+from_len;
2963 }
2964 }
2965 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002966 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002967
Thomas Wouters477c8d52006-05-27 19:21:47 +00002968 return result;
2969}
2970
2971
2972Py_LOCAL(PyStringObject *)
2973replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002974 const char *from_s, Py_ssize_t from_len,
2975 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002976 Py_ssize_t maxcount)
2977{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002978 if (maxcount < 0) {
2979 maxcount = PY_SSIZE_T_MAX;
2980 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2981 /* nothing to do; return the original string */
2982 return return_self(self);
2983 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002984
Thomas Wouters477c8d52006-05-27 19:21:47 +00002985 if (maxcount == 0 ||
2986 (from_len == 0 && to_len == 0)) {
2987 /* nothing to do; return the original string */
2988 return return_self(self);
2989 }
2990
2991 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002992
Thomas Wouters477c8d52006-05-27 19:21:47 +00002993 if (from_len == 0) {
2994 /* insert the 'to' string everywhere. */
2995 /* >>> "Python".replace("", ".") */
2996 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002997 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002998 }
2999
3000 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001 /* point for an empty self string to generate a non-empty string */
3002 /* Special case so the remaining code always gets a non-empty string */
3003 if (PyString_GET_SIZE(self) == 0) {
3004 return return_self(self);
3005 }
3006
3007 if (to_len == 0) {
3008 /* delete all occurances of 'from' string */
3009 if (from_len == 1) {
3010 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003011 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003012 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003013 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003014 }
3015 }
3016
3017 /* Handle special case where both strings have the same length */
3018
3019 if (from_len == to_len) {
3020 if (from_len == 1) {
3021 return replace_single_character_in_place(
3022 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003023 from_s[0],
3024 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00003025 maxcount);
3026 } else {
3027 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003028 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003029 }
3030 }
3031
3032 /* Otherwise use the more generic algorithms */
3033 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003034 return replace_single_character(self, from_s[0],
3035 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003036 } else {
3037 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003038 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003039 }
3040}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003042PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003043"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044\n\
3045Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003046old replaced by new. If the optional argument count is\n\
3047given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
3049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003050string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003052 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003053 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003054 const char *from_s, *to_s;
3055 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
Thomas Wouters477c8d52006-05-27 19:21:47 +00003057 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059
Thomas Wouters477c8d52006-05-27 19:21:47 +00003060 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003061 from_s = PyString_AS_STRING(from);
3062 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003064#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003065 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003066 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003067 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003068#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003069 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 return NULL;
3071
Thomas Wouters477c8d52006-05-27 19:21:47 +00003072 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003073 to_s = PyString_AS_STRING(to);
3074 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003077 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003079 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003081 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 return NULL;
3083
Thomas Wouters477c8d52006-05-27 19:21:47 +00003084 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003085 from_s, from_len,
3086 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087}
3088
Thomas Wouters477c8d52006-05-27 19:21:47 +00003089/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003091/* Matches the end (direction >= 0) or start (direction < 0) of self
3092 * against substr, using the start and end arguments. Returns
3093 * -1 on error, 0 if not found and 1 if found.
3094 */
3095Py_LOCAL(int)
3096_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3097 Py_ssize_t end, int direction)
3098{
3099 Py_ssize_t len = PyString_GET_SIZE(self);
3100 Py_ssize_t slen;
3101 const char* sub;
3102 const char* str;
3103
3104 if (PyString_Check(substr)) {
3105 sub = PyString_AS_STRING(substr);
3106 slen = PyString_GET_SIZE(substr);
3107 }
3108#ifdef Py_USING_UNICODE
3109 else if (PyUnicode_Check(substr))
3110 return PyUnicode_Tailmatch((PyObject *)self,
3111 substr, start, end, direction);
3112#endif
3113 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3114 return -1;
3115 str = PyString_AS_STRING(self);
3116
3117 string_adjust_indices(&start, &end, len);
3118
3119 if (direction < 0) {
3120 /* startswith */
3121 if (start+slen > len)
3122 return 0;
3123 } else {
3124 /* endswith */
3125 if (end-start < slen || start > len)
3126 return 0;
3127
3128 if (end-slen > start)
3129 start = end - slen;
3130 }
3131 if (end-start >= slen)
3132 return ! memcmp(str+start, sub, slen);
3133 return 0;
3134}
3135
3136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003137PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003138"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003140Return True if S starts with the specified prefix, False otherwise.\n\
3141With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003142With optional end, stop comparing S at that position.\n\
3143prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144
3145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003146string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003148 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003149 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003151 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152
Guido van Rossumc6821402000-05-08 14:08:05 +00003153 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3154 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003156 if (PyTuple_Check(subobj)) {
3157 Py_ssize_t i;
3158 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3159 result = _string_tailmatch(self,
3160 PyTuple_GET_ITEM(subobj, i),
3161 start, end, -1);
3162 if (result == -1)
3163 return NULL;
3164 else if (result) {
3165 Py_RETURN_TRUE;
3166 }
3167 }
3168 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003170 result = _string_tailmatch(self, subobj, start, end, -1);
3171 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003173 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003174 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003175}
3176
3177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003178PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003179"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003181Return True if S ends with the specified suffix, False otherwise.\n\
3182With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003183With optional end, stop comparing S at that position.\n\
3184suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185
3186static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003187string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003189 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003190 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003192 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193
Guido van Rossumc6821402000-05-08 14:08:05 +00003194 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3195 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003197 if (PyTuple_Check(subobj)) {
3198 Py_ssize_t i;
3199 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3200 result = _string_tailmatch(self,
3201 PyTuple_GET_ITEM(subobj, i),
3202 start, end, +1);
3203 if (result == -1)
3204 return NULL;
3205 else if (result) {
3206 Py_RETURN_TRUE;
3207 }
3208 }
3209 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003211 result = _string_tailmatch(self, subobj, start, end, +1);
3212 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003215 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216}
3217
3218
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003219PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003220"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222Encodes S using the codec registered for encoding. encoding defaults\n\
3223to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003225a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226'xmlcharrefreplace' as well as any other name registered with\n\
3227codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003228
3229static PyObject *
3230string_encode(PyStringObject *self, PyObject *args)
3231{
3232 char *encoding = NULL;
3233 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003234 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003235
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003236 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003239 if (v == NULL)
3240 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003241 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242 PyErr_Format(PyExc_TypeError,
3243 "encoder did not return a string/unicode object "
3244 "(type=%.400s)",
3245 v->ob_type->tp_name);
3246 Py_DECREF(v);
3247 return NULL;
3248 }
3249 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003250
3251 onError:
3252 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003253}
3254
3255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003256PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257"S.decode([encoding[,errors]]) -> object\n\
3258\n\
3259Decodes S using the codec registered for encoding. encoding defaults\n\
3260to the default encoding. errors may be given to set a different error\n\
3261handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003262a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263as well as any other name registerd with codecs.register_error that is\n\
3264able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003265
3266static PyObject *
3267string_decode(PyStringObject *self, PyObject *args)
3268{
3269 char *encoding = NULL;
3270 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003272
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003276 if (v == NULL)
3277 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003278 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279 PyErr_Format(PyExc_TypeError,
3280 "decoder did not return a string/unicode object "
3281 "(type=%.400s)",
3282 v->ob_type->tp_name);
3283 Py_DECREF(v);
3284 return NULL;
3285 }
3286 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003287
3288 onError:
3289 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003290}
3291
3292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003293PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294"S.expandtabs([tabsize]) -> string\n\
3295\n\
3296Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003297If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298
3299static PyObject*
3300string_expandtabs(PyStringObject *self, PyObject *args)
3301{
3302 const char *e, *p;
3303 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003304 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305 PyObject *u;
3306 int tabsize = 8;
3307
3308 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309 return NULL;
3310
Thomas Wouters7e474022000-07-16 12:04:32 +00003311 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003312 i = j = 0;
3313 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3314 for (p = PyString_AS_STRING(self); p < e; p++)
3315 if (*p == '\t') {
3316 if (tabsize > 0)
3317 j += tabsize - (j % tabsize);
3318 }
3319 else {
3320 j++;
3321 if (*p == '\n' || *p == '\r') {
3322 i += j;
3323 j = 0;
3324 }
3325 }
3326
3327 /* Second pass: create output string and fill it */
3328 u = PyString_FromStringAndSize(NULL, i + j);
3329 if (!u)
3330 return NULL;
3331
3332 j = 0;
3333 q = PyString_AS_STRING(u);
3334
3335 for (p = PyString_AS_STRING(self); p < e; p++)
3336 if (*p == '\t') {
3337 if (tabsize > 0) {
3338 i = tabsize - (j % tabsize);
3339 j += i;
3340 while (i--)
3341 *q++ = ' ';
3342 }
3343 }
3344 else {
3345 j++;
3346 *q++ = *p;
3347 if (*p == '\n' || *p == '\r')
3348 j = 0;
3349 }
3350
3351 return u;
3352}
3353
Thomas Wouters477c8d52006-05-27 19:21:47 +00003354Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003355pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356{
3357 PyObject *u;
3358
3359 if (left < 0)
3360 left = 0;
3361 if (right < 0)
3362 right = 0;
3363
Tim Peters8fa5dd02001-09-12 02:18:30 +00003364 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003365 Py_INCREF(self);
3366 return (PyObject *)self;
3367 }
3368
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003369 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003370 left + PyString_GET_SIZE(self) + right);
3371 if (u) {
3372 if (left)
3373 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003374 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003375 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376 PyString_GET_SIZE(self));
3377 if (right)
3378 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3379 fill, right);
3380 }
3381
3382 return u;
3383}
3384
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003385PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003386"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003387"\n"
3388"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003389"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003390
3391static PyObject *
3392string_ljust(PyStringObject *self, PyObject *args)
3393{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003394 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003395 char fillchar = ' ';
3396
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003397 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398 return NULL;
3399
Tim Peters8fa5dd02001-09-12 02:18:30 +00003400 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401 Py_INCREF(self);
3402 return (PyObject*) self;
3403 }
3404
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003405 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406}
3407
3408
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003409PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003410"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003411"\n"
3412"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003413"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414
3415static PyObject *
3416string_rjust(PyStringObject *self, PyObject *args)
3417{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003418 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003419 char fillchar = ' ';
3420
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003421 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422 return NULL;
3423
Tim Peters8fa5dd02001-09-12 02:18:30 +00003424 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425 Py_INCREF(self);
3426 return (PyObject*) self;
3427 }
3428
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430}
3431
3432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003433PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003435"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003436"Return S centered in a string of length width. Padding is\n"
3437"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438
3439static PyObject *
3440string_center(PyStringObject *self, PyObject *args)
3441{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003442 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003443 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003446 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447 return NULL;
3448
Tim Peters8fa5dd02001-09-12 02:18:30 +00003449 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003450 Py_INCREF(self);
3451 return (PyObject*) self;
3452 }
3453
3454 marg = width - PyString_GET_SIZE(self);
3455 left = marg / 2 + (marg & width & 1);
3456
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003457 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458}
3459
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003460PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003461"S.zfill(width) -> string\n"
3462"\n"
3463"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003464"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003465
3466static PyObject *
3467string_zfill(PyStringObject *self, PyObject *args)
3468{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003469 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003470 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003471 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003472 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003473
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003474 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003475 return NULL;
3476
3477 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003478 if (PyString_CheckExact(self)) {
3479 Py_INCREF(self);
3480 return (PyObject*) self;
3481 }
3482 else
3483 return PyString_FromStringAndSize(
3484 PyString_AS_STRING(self),
3485 PyString_GET_SIZE(self)
3486 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003487 }
3488
3489 fill = width - PyString_GET_SIZE(self);
3490
3491 s = pad(self, fill, 0, '0');
3492
3493 if (s == NULL)
3494 return NULL;
3495
3496 p = PyString_AS_STRING(s);
3497 if (p[fill] == '+' || p[fill] == '-') {
3498 /* move sign to beginning of string */
3499 p[0] = p[fill];
3500 p[fill] = '0';
3501 }
3502
3503 return (PyObject*) s;
3504}
3505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003506PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003507"S.isspace() -> bool\n\
3508\n\
3509Return True if all characters in S are whitespace\n\
3510and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003511
3512static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003513string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514{
Fred Drakeba096332000-07-09 07:04:36 +00003515 register const unsigned char *p
3516 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003517 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003518
Guido van Rossum4c08d552000-03-10 22:55:18 +00003519 /* Shortcut for single character strings */
3520 if (PyString_GET_SIZE(self) == 1 &&
3521 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003523
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003524 /* Special case for empty strings */
3525 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003526 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003527
Guido van Rossum4c08d552000-03-10 22:55:18 +00003528 e = p + PyString_GET_SIZE(self);
3529 for (; p < e; p++) {
3530 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003531 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003533 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534}
3535
3536
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003537PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003540Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003541and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003542
3543static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003544string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003545{
Fred Drakeba096332000-07-09 07:04:36 +00003546 register const unsigned char *p
3547 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003548 register const unsigned char *e;
3549
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550 /* Shortcut for single character strings */
3551 if (PyString_GET_SIZE(self) == 1 &&
3552 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003553 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003554
3555 /* Special case for empty strings */
3556 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003557 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558
3559 e = p + PyString_GET_SIZE(self);
3560 for (; p < e; p++) {
3561 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565}
3566
3567
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003571Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003572and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573
3574static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003575string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576{
Fred Drakeba096332000-07-09 07:04:36 +00003577 register const unsigned char *p
3578 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 register const unsigned char *e;
3580
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581 /* Shortcut for single character strings */
3582 if (PyString_GET_SIZE(self) == 1 &&
3583 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003585
3586 /* Special case for empty strings */
3587 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589
3590 e = p + PyString_GET_SIZE(self);
3591 for (; p < e; p++) {
3592 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003595 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596}
3597
3598
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003599PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003602Return True if all characters in S are digits\n\
3603and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003604
3605static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003606string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607{
Fred Drakeba096332000-07-09 07:04:36 +00003608 register const unsigned char *p
3609 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003610 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003611
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612 /* Shortcut for single character strings */
3613 if (PyString_GET_SIZE(self) == 1 &&
3614 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003616
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003617 /* Special case for empty strings */
3618 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003620
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621 e = p + PyString_GET_SIZE(self);
3622 for (; p < e; p++) {
3623 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627}
3628
3629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003630PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003634at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635
3636static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003637string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638{
Fred Drakeba096332000-07-09 07:04:36 +00003639 register const unsigned char *p
3640 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003641 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 int cased;
3643
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644 /* Shortcut for single character strings */
3645 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003646 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003648 /* Special case for empty strings */
3649 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003651
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652 e = p + PyString_GET_SIZE(self);
3653 cased = 0;
3654 for (; p < e; p++) {
3655 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657 else if (!cased && islower(*p))
3658 cased = 1;
3659 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661}
3662
3663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003664PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003667Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003668at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669
3670static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003671string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672{
Fred Drakeba096332000-07-09 07:04:36 +00003673 register const unsigned char *p
3674 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003675 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 int cased;
3677
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678 /* Shortcut for single character strings */
3679 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003682 /* Special case for empty strings */
3683 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003685
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 e = p + PyString_GET_SIZE(self);
3687 cased = 0;
3688 for (; p < e; p++) {
3689 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691 else if (!cased && isupper(*p))
3692 cased = 1;
3693 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695}
3696
3697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003698PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003701Return True if S is a titlecased string and there is at least one\n\
3702character in S, i.e. uppercase characters may only follow uncased\n\
3703characters and lowercase characters only cased ones. Return False\n\
3704otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705
3706static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003707string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708{
Fred Drakeba096332000-07-09 07:04:36 +00003709 register const unsigned char *p
3710 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003711 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 int cased, previous_is_cased;
3713
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714 /* Shortcut for single character strings */
3715 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003718 /* Special case for empty strings */
3719 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003721
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 e = p + PyString_GET_SIZE(self);
3723 cased = 0;
3724 previous_is_cased = 0;
3725 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003726 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003727
3728 if (isupper(ch)) {
3729 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003730 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 previous_is_cased = 1;
3732 cased = 1;
3733 }
3734 else if (islower(ch)) {
3735 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737 previous_is_cased = 1;
3738 cased = 1;
3739 }
3740 else
3741 previous_is_cased = 0;
3742 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003743 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744}
3745
3746
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003747PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003748"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749\n\
3750Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003751Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003752is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754static PyObject*
3755string_splitlines(PyStringObject *self, PyObject *args)
3756{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003757 register Py_ssize_t i;
3758 register Py_ssize_t j;
3759 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003760 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 PyObject *list;
3762 PyObject *str;
3763 char *data;
3764
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003765 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 return NULL;
3767
3768 data = PyString_AS_STRING(self);
3769 len = PyString_GET_SIZE(self);
3770
Thomas Wouters477c8d52006-05-27 19:21:47 +00003771 /* This does not use the preallocated list because splitlines is
3772 usually run with hundreds of newlines. The overhead of
3773 switching between PyList_SET_ITEM and append causes about a
3774 2-3% slowdown for that common case. A smarter implementation
3775 could move the if check out, so the SET_ITEMs are done first
3776 and the appends only done when the prealloc buffer is full.
3777 That's too much work for little gain.*/
3778
Guido van Rossum4c08d552000-03-10 22:55:18 +00003779 list = PyList_New(0);
3780 if (!list)
3781 goto onError;
3782
3783 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003784 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003785
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786 /* Find a line and append it */
3787 while (i < len && data[i] != '\n' && data[i] != '\r')
3788 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003789
3790 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003791 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792 if (i < len) {
3793 if (data[i] == '\r' && i + 1 < len &&
3794 data[i+1] == '\n')
3795 i += 2;
3796 else
3797 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003798 if (keepends)
3799 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003801 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802 j = i;
3803 }
3804 if (j < len) {
3805 SPLIT_APPEND(data, j, len);
3806 }
3807
3808 return list;
3809
3810 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003811 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003812 return NULL;
3813}
3814
3815#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003816#undef SPLIT_ADD
3817#undef MAX_PREALLOC
3818#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003819
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003820static PyObject *
3821string_getnewargs(PyStringObject *v)
3822{
3823 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3824}
3825
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003826
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003827static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003828string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003829 /* Counterparts of the obsolete stropmodule functions; except
3830 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003831 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3832 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003833 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003834 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3835 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003836 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3837 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3838 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3839 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3840 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3841 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3842 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003843 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3844 capitalize__doc__},
3845 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3846 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3847 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003848 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3850 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3851 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3852 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3853 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3854 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3855 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003856 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3857 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003858 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3859 startswith__doc__},
3860 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3861 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3862 swapcase__doc__},
3863 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3864 translate__doc__},
3865 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3866 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3867 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3868 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3869 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3870 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3871 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3872 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3873 expandtabs__doc__},
3874 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3875 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003876 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003877 {NULL, NULL} /* sentinel */
3878};
3879
Jeremy Hylton938ace62002-07-17 16:30:39 +00003880static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003881str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3882
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003883static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003884string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003885{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003886 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003887 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003888
Guido van Rossumae960af2001-08-30 03:11:59 +00003889 if (type != &PyString_Type)
3890 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003891 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3892 return NULL;
3893 if (x == NULL)
3894 return PyString_FromString("");
3895 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003896}
3897
Guido van Rossumae960af2001-08-30 03:11:59 +00003898static PyObject *
3899str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3900{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003901 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003902 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003903
3904 assert(PyType_IsSubtype(type, &PyString_Type));
3905 tmp = string_new(&PyString_Type, args, kwds);
3906 if (tmp == NULL)
3907 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003908 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003909 n = PyString_GET_SIZE(tmp);
3910 pnew = type->tp_alloc(type, n);
3911 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003912 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003913 ((PyStringObject *)pnew)->ob_shash =
3914 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003915 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003916 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003917 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003918 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003919}
3920
Guido van Rossumcacfc072002-05-24 19:01:59 +00003921static PyObject *
3922basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3923{
3924 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003925 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003926 return NULL;
3927}
3928
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003929static PyObject *
3930string_mod(PyObject *v, PyObject *w)
3931{
3932 if (!PyString_Check(v)) {
3933 Py_INCREF(Py_NotImplemented);
3934 return Py_NotImplemented;
3935 }
3936 return PyString_Format(v, w);
3937}
3938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003939PyDoc_STRVAR(basestring_doc,
3940"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003941
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003942static PyNumberMethods string_as_number = {
3943 0, /*nb_add*/
3944 0, /*nb_subtract*/
3945 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003946 string_mod, /*nb_remainder*/
3947};
3948
3949
Guido van Rossumcacfc072002-05-24 19:01:59 +00003950PyTypeObject PyBaseString_Type = {
3951 PyObject_HEAD_INIT(&PyType_Type)
3952 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003953 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003954 0,
3955 0,
3956 0, /* tp_dealloc */
3957 0, /* tp_print */
3958 0, /* tp_getattr */
3959 0, /* tp_setattr */
3960 0, /* tp_compare */
3961 0, /* tp_repr */
3962 0, /* tp_as_number */
3963 0, /* tp_as_sequence */
3964 0, /* tp_as_mapping */
3965 0, /* tp_hash */
3966 0, /* tp_call */
3967 0, /* tp_str */
3968 0, /* tp_getattro */
3969 0, /* tp_setattro */
3970 0, /* tp_as_buffer */
3971 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3972 basestring_doc, /* tp_doc */
3973 0, /* tp_traverse */
3974 0, /* tp_clear */
3975 0, /* tp_richcompare */
3976 0, /* tp_weaklistoffset */
3977 0, /* tp_iter */
3978 0, /* tp_iternext */
3979 0, /* tp_methods */
3980 0, /* tp_members */
3981 0, /* tp_getset */
3982 &PyBaseObject_Type, /* tp_base */
3983 0, /* tp_dict */
3984 0, /* tp_descr_get */
3985 0, /* tp_descr_set */
3986 0, /* tp_dictoffset */
3987 0, /* tp_init */
3988 0, /* tp_alloc */
3989 basestring_new, /* tp_new */
3990 0, /* tp_free */
3991};
3992
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003993PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003994"str(object) -> string\n\
3995\n\
3996Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003997If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003998
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003999static PyObject *str_iter(PyObject *seq);
4000
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004001PyTypeObject PyString_Type = {
4002 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004003 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004004 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004005 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004006 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004007 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004008 (printfunc)string_print, /* tp_print */
4009 0, /* tp_getattr */
4010 0, /* tp_setattr */
4011 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004012 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004013 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004014 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004015 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004016 (hashfunc)string_hash, /* tp_hash */
4017 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004018 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004019 PyObject_GenericGetAttr, /* tp_getattro */
4020 0, /* tp_setattro */
4021 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00004022 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
4023 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024 string_doc, /* tp_doc */
4025 0, /* tp_traverse */
4026 0, /* tp_clear */
4027 (richcmpfunc)string_richcompare, /* tp_richcompare */
4028 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004029 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004030 0, /* tp_iternext */
4031 string_methods, /* tp_methods */
4032 0, /* tp_members */
4033 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004034 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004035 0, /* tp_dict */
4036 0, /* tp_descr_get */
4037 0, /* tp_descr_set */
4038 0, /* tp_dictoffset */
4039 0, /* tp_init */
4040 0, /* tp_alloc */
4041 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004042 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004043};
4044
4045void
Fred Drakeba096332000-07-09 07:04:36 +00004046PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004048 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004049 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004050 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004051 if (w == NULL || !PyString_Check(*pv)) {
4052 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004053 *pv = NULL;
4054 return;
4055 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004056 v = string_concat((PyStringObject *) *pv, w);
4057 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004058 *pv = v;
4059}
4060
Guido van Rossum013142a1994-08-30 08:19:36 +00004061void
Fred Drakeba096332000-07-09 07:04:36 +00004062PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004063{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004064 PyString_Concat(pv, w);
4065 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004066}
4067
4068
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004069/* The following function breaks the notion that strings are immutable:
4070 it changes the size of a string. We get away with this only if there
4071 is only one module referencing the object. You can also think of it
4072 as creating a new string object and destroying the old one, only
4073 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004074 already be known to some other part of the code...
4075 Note that if there's not enough memory to resize the string, the original
4076 string object at *pv is deallocated, *pv is set to NULL, an "out of
4077 memory" exception is set, and -1 is returned. Else (on success) 0 is
4078 returned, and the value in *pv may or may not be the same as on input.
4079 As always, an extra byte is allocated for a trailing \0 byte (newsize
4080 does *not* include that), and a trailing \0 byte is stored.
4081*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004082
4083int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004084_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004086 register PyObject *v;
4087 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004088 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004089 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4090 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004091 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004092 Py_DECREF(v);
4093 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004094 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004095 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004096 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004097 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004098 _Py_ForgetReference(v);
4099 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004100 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004101 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004102 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004103 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004104 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 _Py_NewReference(*pv);
4107 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004108 sv->ob_size = newsize;
4109 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004110 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111 return 0;
4112}
Guido van Rossume5372401993-03-16 12:15:04 +00004113
4114/* Helpers for formatstring */
4115
Thomas Wouters477c8d52006-05-27 19:21:47 +00004116Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004117getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004118{
Thomas Wouters977485d2006-02-16 15:59:12 +00004119 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004120 if (argidx < arglen) {
4121 (*p_argidx)++;
4122 if (arglen < 0)
4123 return args;
4124 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004125 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004126 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004127 PyErr_SetString(PyExc_TypeError,
4128 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004129 return NULL;
4130}
4131
Tim Peters38fd5b62000-09-21 05:43:11 +00004132/* Format codes
4133 * F_LJUST '-'
4134 * F_SIGN '+'
4135 * F_BLANK ' '
4136 * F_ALT '#'
4137 * F_ZERO '0'
4138 */
Guido van Rossume5372401993-03-16 12:15:04 +00004139#define F_LJUST (1<<0)
4140#define F_SIGN (1<<1)
4141#define F_BLANK (1<<2)
4142#define F_ALT (1<<3)
4143#define F_ZERO (1<<4)
4144
Thomas Wouters477c8d52006-05-27 19:21:47 +00004145Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004146formatfloat(char *buf, size_t buflen, int flags,
4147 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004148{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004149 /* fmt = '%#.' + `prec` + `type`
4150 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004151 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004152 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004153 x = PyFloat_AsDouble(v);
4154 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004155 PyErr_Format(PyExc_TypeError, "float argument required, "
4156 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004157 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004158 }
Guido van Rossume5372401993-03-16 12:15:04 +00004159 if (prec < 0)
4160 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004161 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4162 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004163 /* Worst case length calc to ensure no buffer overrun:
4164
4165 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004166 fmt = %#.<prec>g
4167 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004168 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004169 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004170
4171 'f' formats:
4172 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4173 len = 1 + 50 + 1 + prec = 52 + prec
4174
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004175 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004176 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004177
4178 */
4179 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4180 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004181 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004182 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004183 return -1;
4184 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004185 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4186 (flags&F_ALT) ? "#" : "",
4187 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004188 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004189 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004190}
4191
Tim Peters38fd5b62000-09-21 05:43:11 +00004192/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4193 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4194 * Python's regular ints.
4195 * Return value: a new PyString*, or NULL if error.
4196 * . *pbuf is set to point into it,
4197 * *plen set to the # of chars following that.
4198 * Caller must decref it when done using pbuf.
4199 * The string starting at *pbuf is of the form
4200 * "-"? ("0x" | "0X")? digit+
4201 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004202 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004203 * There will be at least prec digits, zero-filled on the left if
4204 * necessary to get that many.
4205 * val object to be converted
4206 * flags bitmask of format flags; only F_ALT is looked at
4207 * prec minimum number of digits; 0-fill on left if needed
4208 * type a character in [duoxX]; u acts the same as d
4209 *
4210 * CAUTION: o, x and X conversions on regular ints can never
4211 * produce a '-' sign, but can for Python's unbounded ints.
4212 */
4213PyObject*
4214_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4215 char **pbuf, int *plen)
4216{
4217 PyObject *result = NULL;
4218 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004219 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004220 int sign; /* 1 if '-', else 0 */
4221 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004222 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004223 int numdigits; /* len == numnondigits + numdigits */
4224 int numnondigits = 0;
4225
Guido van Rossumddefaf32007-01-14 03:31:43 +00004226 /* Avoid exceeding SSIZE_T_MAX */
4227 if (prec > PY_SSIZE_T_MAX-3) {
4228 PyErr_SetString(PyExc_OverflowError,
4229 "precision too large");
4230 return NULL;
4231 }
4232
4233
Tim Peters38fd5b62000-09-21 05:43:11 +00004234 switch (type) {
4235 case 'd':
4236 case 'u':
4237 result = val->ob_type->tp_str(val);
4238 break;
4239 case 'o':
4240 result = val->ob_type->tp_as_number->nb_oct(val);
4241 break;
4242 case 'x':
4243 case 'X':
4244 numnondigits = 2;
4245 result = val->ob_type->tp_as_number->nb_hex(val);
4246 break;
4247 default:
4248 assert(!"'type' not in [duoxX]");
4249 }
4250 if (!result)
4251 return NULL;
4252
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004253 buf = PyString_AsString(result);
4254 if (!buf) {
4255 Py_DECREF(result);
4256 return NULL;
4257 }
4258
Tim Peters38fd5b62000-09-21 05:43:11 +00004259 /* To modify the string in-place, there can only be one reference. */
4260 if (result->ob_refcnt != 1) {
4261 PyErr_BadInternalCall();
4262 return NULL;
4263 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004264 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004265 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004266 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4267 return NULL;
4268 }
4269 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 if (buf[len-1] == 'L') {
4271 --len;
4272 buf[len] = '\0';
4273 }
4274 sign = buf[0] == '-';
4275 numnondigits += sign;
4276 numdigits = len - numnondigits;
4277 assert(numdigits > 0);
4278
Tim Petersfff53252001-04-12 18:38:48 +00004279 /* Get rid of base marker unless F_ALT */
4280 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004281 /* Need to skip 0x, 0X or 0. */
4282 int skipped = 0;
4283 switch (type) {
4284 case 'o':
4285 assert(buf[sign] == '0');
4286 /* If 0 is only digit, leave it alone. */
4287 if (numdigits > 1) {
4288 skipped = 1;
4289 --numdigits;
4290 }
4291 break;
4292 case 'x':
4293 case 'X':
4294 assert(buf[sign] == '0');
4295 assert(buf[sign + 1] == 'x');
4296 skipped = 2;
4297 numnondigits -= 2;
4298 break;
4299 }
4300 if (skipped) {
4301 buf += skipped;
4302 len -= skipped;
4303 if (sign)
4304 buf[0] = '-';
4305 }
4306 assert(len == numnondigits + numdigits);
4307 assert(numdigits > 0);
4308 }
4309
4310 /* Fill with leading zeroes to meet minimum width. */
4311 if (prec > numdigits) {
4312 PyObject *r1 = PyString_FromStringAndSize(NULL,
4313 numnondigits + prec);
4314 char *b1;
4315 if (!r1) {
4316 Py_DECREF(result);
4317 return NULL;
4318 }
4319 b1 = PyString_AS_STRING(r1);
4320 for (i = 0; i < numnondigits; ++i)
4321 *b1++ = *buf++;
4322 for (i = 0; i < prec - numdigits; i++)
4323 *b1++ = '0';
4324 for (i = 0; i < numdigits; i++)
4325 *b1++ = *buf++;
4326 *b1 = '\0';
4327 Py_DECREF(result);
4328 result = r1;
4329 buf = PyString_AS_STRING(result);
4330 len = numnondigits + prec;
4331 }
4332
4333 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004334 if (type == 'X') {
4335 /* Need to convert all lower case letters to upper case.
4336 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004337 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004338 if (buf[i] >= 'a' && buf[i] <= 'x')
4339 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004340 }
4341 *pbuf = buf;
4342 *plen = len;
4343 return result;
4344}
4345
Thomas Wouters477c8d52006-05-27 19:21:47 +00004346Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004347formatint(char *buf, size_t buflen, int flags,
4348 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004349{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004350 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004351 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4352 + 1 + 1 = 24 */
4353 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004354 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004355 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004356
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004357 x = PyInt_AsLong(v);
4358 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004359 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4360 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004361 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004362 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004363 if (x < 0 && type == 'u') {
4364 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004365 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004366 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4367 sign = "-";
4368 else
4369 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004370 if (prec < 0)
4371 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004372
4373 if ((flags & F_ALT) &&
4374 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004375 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376 * of issues that cause pain:
4377 * - when 0 is being converted, the C standard leaves off
4378 * the '0x' or '0X', which is inconsistent with other
4379 * %#x/%#X conversions and inconsistent with Python's
4380 * hex() function
4381 * - there are platforms that violate the standard and
4382 * convert 0 with the '0x' or '0X'
4383 * (Metrowerks, Compaq Tru64)
4384 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004385 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004386 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004387 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004388 * We can achieve the desired consistency by inserting our
4389 * own '0x' or '0X' prefix, and substituting %x/%X in place
4390 * of %#x/%#X.
4391 *
4392 * Note that this is the same approach as used in
4393 * formatint() in unicodeobject.c
4394 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004395 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4396 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004397 }
4398 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4400 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 prec, type);
4402 }
4403
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004404 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4405 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004406 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004407 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004408 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004409 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004410 return -1;
4411 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004412 if (sign[0])
4413 PyOS_snprintf(buf, buflen, fmt, -x);
4414 else
4415 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004416 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004417}
4418
Thomas Wouters477c8d52006-05-27 19:21:47 +00004419Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004420formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004421{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004422 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004423 if (PyString_Check(v)) {
4424 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004425 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004426 }
4427 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004428 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004429 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004430 }
4431 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004432 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004433}
4434
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004435/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4436
4437 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4438 chars are formatted. XXX This is a magic number. Each formatting
4439 routine does bounds checking to ensure no overflow, but a better
4440 solution may be to malloc a buffer of appropriate size for each
4441 format. For now, the current solution is sufficient.
4442*/
4443#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004444
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004445PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004446PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004447{
4448 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004449 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004450 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004451 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004452 PyObject *result, *orig_args;
4453#ifdef Py_USING_UNICODE
4454 PyObject *v, *w;
4455#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004456 PyObject *dict = NULL;
4457 if (format == NULL || !PyString_Check(format) || args == NULL) {
4458 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004459 return NULL;
4460 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004461 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004462 fmt = PyString_AS_STRING(format);
4463 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004464 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004465 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004466 if (result == NULL)
4467 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004468 res = PyString_AsString(result);
4469 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004470 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004471 argidx = 0;
4472 }
4473 else {
4474 arglen = -1;
4475 argidx = -2;
4476 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004477 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4478 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004479 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004480 while (--fmtcnt >= 0) {
4481 if (*fmt != '%') {
4482 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004483 rescnt = fmtcnt + 100;
4484 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004485 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004486 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004487 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004488 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004489 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004490 }
4491 *res++ = *fmt++;
4492 }
4493 else {
4494 /* Got a format specifier */
4495 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004496 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004497 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004498 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004499 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 PyObject *v = NULL;
4501 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004502 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004503 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004504 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004505 char formatbuf[FORMATBUFLEN];
4506 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004507#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004508 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004509 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004510#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004511
Guido van Rossumda9c2711996-12-05 21:58:58 +00004512 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004513 if (*fmt == '(') {
4514 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004515 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004516 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004517 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004518
4519 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004521 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004522 goto error;
4523 }
4524 ++fmt;
4525 --fmtcnt;
4526 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004527 /* Skip over balanced parentheses */
4528 while (pcount > 0 && --fmtcnt >= 0) {
4529 if (*fmt == ')')
4530 --pcount;
4531 else if (*fmt == '(')
4532 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004533 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004534 }
4535 keylen = fmt - keystart - 1;
4536 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004537 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004538 "incomplete format key");
4539 goto error;
4540 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 key = PyString_FromStringAndSize(keystart,
4542 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004543 if (key == NULL)
4544 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004545 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004546 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004547 args_owned = 0;
4548 }
4549 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004550 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 if (args == NULL) {
4552 goto error;
4553 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004554 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004555 arglen = -1;
4556 argidx = -2;
4557 }
Guido van Rossume5372401993-03-16 12:15:04 +00004558 while (--fmtcnt >= 0) {
4559 switch (c = *fmt++) {
4560 case '-': flags |= F_LJUST; continue;
4561 case '+': flags |= F_SIGN; continue;
4562 case ' ': flags |= F_BLANK; continue;
4563 case '#': flags |= F_ALT; continue;
4564 case '0': flags |= F_ZERO; continue;
4565 }
4566 break;
4567 }
4568 if (c == '*') {
4569 v = getnextarg(args, arglen, &argidx);
4570 if (v == NULL)
4571 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 if (!PyInt_Check(v)) {
4573 PyErr_SetString(PyExc_TypeError,
4574 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004575 goto error;
4576 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004578 if (width == -1 && PyErr_Occurred())
4579 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004580 if (width < 0) {
4581 flags |= F_LJUST;
4582 width = -width;
4583 }
Guido van Rossume5372401993-03-16 12:15:04 +00004584 if (--fmtcnt >= 0)
4585 c = *fmt++;
4586 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004587 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004588 width = c - '0';
4589 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004590 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004591 if (!isdigit(c))
4592 break;
4593 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004594 PyErr_SetString(
4595 PyExc_ValueError,
4596 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004597 goto error;
4598 }
4599 width = width*10 + (c - '0');
4600 }
4601 }
4602 if (c == '.') {
4603 prec = 0;
4604 if (--fmtcnt >= 0)
4605 c = *fmt++;
4606 if (c == '*') {
4607 v = getnextarg(args, arglen, &argidx);
4608 if (v == NULL)
4609 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004610 if (!PyInt_Check(v)) {
4611 PyErr_SetString(
4612 PyExc_TypeError,
4613 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004614 goto error;
4615 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004616 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004617 if (prec == -1 && PyErr_Occurred())
4618 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004619 if (prec < 0)
4620 prec = 0;
4621 if (--fmtcnt >= 0)
4622 c = *fmt++;
4623 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004624 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004625 prec = c - '0';
4626 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004627 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004628 if (!isdigit(c))
4629 break;
4630 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004631 PyErr_SetString(
4632 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004633 "prec too big");
4634 goto error;
4635 }
4636 prec = prec*10 + (c - '0');
4637 }
4638 }
4639 } /* prec */
4640 if (fmtcnt >= 0) {
4641 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004642 if (--fmtcnt >= 0)
4643 c = *fmt++;
4644 }
4645 }
4646 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004647 PyErr_SetString(PyExc_ValueError,
4648 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004649 goto error;
4650 }
4651 if (c != '%') {
4652 v = getnextarg(args, arglen, &argidx);
4653 if (v == NULL)
4654 goto error;
4655 }
4656 sign = 0;
4657 fill = ' ';
4658 switch (c) {
4659 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004660 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004661 len = 1;
4662 break;
4663 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004664#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004665 if (PyUnicode_Check(v)) {
4666 fmt = fmt_start;
4667 argidx = argidx_start;
4668 goto unicode;
4669 }
Georg Brandld45014b2005-10-01 17:06:00 +00004670#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004671 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004672#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004673 if (temp != NULL && PyUnicode_Check(temp)) {
4674 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004675 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004676 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004677 goto unicode;
4678 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004679#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004680 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004681 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004682 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004683 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004684 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004685 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004686 if (!PyString_Check(temp)) {
4687 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004688 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004689 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004690 goto error;
4691 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004692 pbuf = PyString_AS_STRING(temp);
4693 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004694 if (prec >= 0 && len > prec)
4695 len = prec;
4696 break;
4697 case 'i':
4698 case 'd':
4699 case 'u':
4700 case 'o':
4701 case 'x':
4702 case 'X':
4703 if (c == 'i')
4704 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004705 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004706 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004707 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004708 prec, c, &pbuf, &ilen);
4709 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 if (!temp)
4711 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004712 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004713 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004714 else {
4715 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004716 len = formatint(pbuf,
4717 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004718 flags, prec, c, v);
4719 if (len < 0)
4720 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004721 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 }
4723 if (flags & F_ZERO)
4724 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004725 break;
4726 case 'e':
4727 case 'E':
4728 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004729 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004730 case 'g':
4731 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004732 if (c == 'F')
4733 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004734 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004735 len = formatfloat(pbuf, sizeof(formatbuf),
4736 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004737 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004738 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004739 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004740 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004741 fill = '0';
4742 break;
4743 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004744#ifdef Py_USING_UNICODE
4745 if (PyUnicode_Check(v)) {
4746 fmt = fmt_start;
4747 argidx = argidx_start;
4748 goto unicode;
4749 }
4750#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004751 pbuf = formatbuf;
4752 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004753 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004754 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004755 break;
4756 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004757 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004758 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004759 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004760 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004761 (Py_ssize_t)(fmt - 1 -
4762 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004763 goto error;
4764 }
4765 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004766 if (*pbuf == '-' || *pbuf == '+') {
4767 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004768 len--;
4769 }
4770 else if (flags & F_SIGN)
4771 sign = '+';
4772 else if (flags & F_BLANK)
4773 sign = ' ';
4774 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004775 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004776 }
4777 if (width < len)
4778 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004779 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004780 reslen -= rescnt;
4781 rescnt = width + fmtcnt + 100;
4782 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004783 if (reslen < 0) {
4784 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004785 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004786 return PyErr_NoMemory();
4787 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004788 if (_PyString_Resize(&result, reslen) < 0) {
4789 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004790 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004791 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004792 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004793 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004794 }
4795 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004796 if (fill != ' ')
4797 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004798 rescnt--;
4799 if (width > len)
4800 width--;
4801 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004802 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4803 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004804 assert(pbuf[1] == c);
4805 if (fill != ' ') {
4806 *res++ = *pbuf++;
4807 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004808 }
Tim Petersfff53252001-04-12 18:38:48 +00004809 rescnt -= 2;
4810 width -= 2;
4811 if (width < 0)
4812 width = 0;
4813 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004814 }
4815 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004816 do {
4817 --rescnt;
4818 *res++ = fill;
4819 } while (--width > len);
4820 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004821 if (fill == ' ') {
4822 if (sign)
4823 *res++ = sign;
4824 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004825 (c == 'x' || c == 'X')) {
4826 assert(pbuf[0] == '0');
4827 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004828 *res++ = *pbuf++;
4829 *res++ = *pbuf++;
4830 }
4831 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004832 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004833 res += len;
4834 rescnt -= len;
4835 while (--width >= len) {
4836 --rescnt;
4837 *res++ = ' ';
4838 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004839 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004840 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004841 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004842 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004843 goto error;
4844 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004845 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004846 } /* '%' */
4847 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004848 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004849 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004850 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004851 goto error;
4852 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004853 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004854 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004855 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004856 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004857 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004858
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004859#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004860 unicode:
4861 if (args_owned) {
4862 Py_DECREF(args);
4863 args_owned = 0;
4864 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004865 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004866 if (PyTuple_Check(orig_args) && argidx > 0) {
4867 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004868 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004869 v = PyTuple_New(n);
4870 if (v == NULL)
4871 goto error;
4872 while (--n >= 0) {
4873 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4874 Py_INCREF(w);
4875 PyTuple_SET_ITEM(v, n, w);
4876 }
4877 args = v;
4878 } else {
4879 Py_INCREF(orig_args);
4880 args = orig_args;
4881 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004882 args_owned = 1;
4883 /* Take what we have of the result and let the Unicode formatting
4884 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004885 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004886 if (_PyString_Resize(&result, rescnt))
4887 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004888 fmtcnt = PyString_GET_SIZE(format) - \
4889 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004890 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4891 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004892 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004893 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004894 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004895 if (v == NULL)
4896 goto error;
4897 /* Paste what we have (result) to what the Unicode formatting
4898 function returned (v) and return the result (or error) */
4899 w = PyUnicode_Concat(result, v);
4900 Py_DECREF(result);
4901 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004902 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004903 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004904#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004905
Guido van Rossume5372401993-03-16 12:15:04 +00004906 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004907 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004908 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004909 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004910 }
Guido van Rossume5372401993-03-16 12:15:04 +00004911 return NULL;
4912}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004913
Guido van Rossum2a61e741997-01-18 07:55:05 +00004914void
Fred Drakeba096332000-07-09 07:04:36 +00004915PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916{
4917 register PyStringObject *s = (PyStringObject *)(*p);
4918 PyObject *t;
4919 if (s == NULL || !PyString_Check(s))
4920 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004921 /* If it's a string subclass, we don't really know what putting
4922 it in the interned dict might do. */
4923 if (!PyString_CheckExact(s))
4924 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004925 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004926 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004927 if (interned == NULL) {
4928 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004929 if (interned == NULL) {
4930 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004931 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004932 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004933 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004934 t = PyDict_GetItem(interned, (PyObject *)s);
4935 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004937 Py_DECREF(*p);
4938 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004939 return;
4940 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004941
Armin Rigo79f7ad22004-08-07 19:27:39 +00004942 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004943 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004944 return;
4945 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004946 /* The two references in interned are not counted by refcnt.
4947 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004948 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004949 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004950}
4951
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952void
4953PyString_InternImmortal(PyObject **p)
4954{
4955 PyString_InternInPlace(p);
4956 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4957 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4958 Py_INCREF(*p);
4959 }
4960}
4961
Guido van Rossum2a61e741997-01-18 07:55:05 +00004962
4963PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004964PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004965{
4966 PyObject *s = PyString_FromString(cp);
4967 if (s == NULL)
4968 return NULL;
4969 PyString_InternInPlace(&s);
4970 return s;
4971}
4972
Guido van Rossum8cf04761997-08-02 02:57:45 +00004973void
Fred Drakeba096332000-07-09 07:04:36 +00004974PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004975{
4976 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004977 for (i = 0; i < UCHAR_MAX + 1; i++) {
4978 Py_XDECREF(characters[i]);
4979 characters[i] = NULL;
4980 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004981 Py_XDECREF(nullstring);
4982 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004983}
Barry Warsawa903ad982001-02-23 16:40:48 +00004984
Barry Warsawa903ad982001-02-23 16:40:48 +00004985void _Py_ReleaseInternedStrings(void)
4986{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004987 PyObject *keys;
4988 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004989 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004990 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004991
4992 if (interned == NULL || !PyDict_Check(interned))
4993 return;
4994 keys = PyDict_Keys(interned);
4995 if (keys == NULL || !PyList_Check(keys)) {
4996 PyErr_Clear();
4997 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004998 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004999
5000 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5001 detector, interned strings are not forcibly deallocated; rather, we
5002 give them their stolen references back, and then clear and DECREF
5003 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00005004
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005005 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00005006 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5007 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005008 for (i = 0; i < n; i++) {
5009 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5010 switch (s->ob_sstate) {
5011 case SSTATE_NOT_INTERNED:
5012 /* XXX Shouldn't happen */
5013 break;
5014 case SSTATE_INTERNED_IMMORTAL:
5015 s->ob_refcnt += 1;
Thomas Wouters27d517b2007-02-25 20:39:11 +00005016 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005017 break;
5018 case SSTATE_INTERNED_MORTAL:
5019 s->ob_refcnt += 2;
Thomas Wouters27d517b2007-02-25 20:39:11 +00005020 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005021 break;
5022 default:
5023 Py_FatalError("Inconsistent interned string state.");
5024 }
5025 s->ob_sstate = SSTATE_NOT_INTERNED;
5026 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00005027 fprintf(stderr, "total size of all interned strings: "
5028 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5029 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005030 Py_DECREF(keys);
5031 PyDict_Clear(interned);
5032 Py_DECREF(interned);
5033 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005034}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005035
5036
5037/*********************** Str Iterator ****************************/
5038
5039typedef struct {
5040 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00005041 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005042 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
5043} striterobject;
5044
5045static void
5046striter_dealloc(striterobject *it)
5047{
5048 _PyObject_GC_UNTRACK(it);
5049 Py_XDECREF(it->it_seq);
5050 PyObject_GC_Del(it);
5051}
5052
5053static int
5054striter_traverse(striterobject *it, visitproc visit, void *arg)
5055{
5056 Py_VISIT(it->it_seq);
5057 return 0;
5058}
5059
5060static PyObject *
5061striter_next(striterobject *it)
5062{
5063 PyStringObject *seq;
5064 PyObject *item;
5065
5066 assert(it != NULL);
5067 seq = it->it_seq;
5068 if (seq == NULL)
5069 return NULL;
5070 assert(PyString_Check(seq));
5071
5072 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005073 item = PyString_FromStringAndSize(
5074 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005075 if (item != NULL)
5076 ++it->it_index;
5077 return item;
5078 }
5079
5080 Py_DECREF(seq);
5081 it->it_seq = NULL;
5082 return NULL;
5083}
5084
5085static PyObject *
5086striter_len(striterobject *it)
5087{
5088 Py_ssize_t len = 0;
5089 if (it->it_seq)
5090 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5091 return PyInt_FromSsize_t(len);
5092}
5093
Guido van Rossum49d6b072006-08-17 21:11:47 +00005094PyDoc_STRVAR(length_hint_doc,
5095 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005096
5097static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005098 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5099 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005100 {NULL, NULL} /* sentinel */
5101};
5102
5103PyTypeObject PyStringIter_Type = {
5104 PyObject_HEAD_INIT(&PyType_Type)
5105 0, /* ob_size */
Guido van Rossum49d6b072006-08-17 21:11:47 +00005106 "striterator", /* tp_name */
5107 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005108 0, /* tp_itemsize */
5109 /* methods */
5110 (destructor)striter_dealloc, /* tp_dealloc */
5111 0, /* tp_print */
5112 0, /* tp_getattr */
5113 0, /* tp_setattr */
5114 0, /* tp_compare */
5115 0, /* tp_repr */
5116 0, /* tp_as_number */
5117 0, /* tp_as_sequence */
5118 0, /* tp_as_mapping */
5119 0, /* tp_hash */
5120 0, /* tp_call */
5121 0, /* tp_str */
5122 PyObject_GenericGetAttr, /* tp_getattro */
5123 0, /* tp_setattro */
5124 0, /* tp_as_buffer */
5125 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5126 0, /* tp_doc */
5127 (traverseproc)striter_traverse, /* tp_traverse */
5128 0, /* tp_clear */
5129 0, /* tp_richcompare */
5130 0, /* tp_weaklistoffset */
5131 PyObject_SelfIter, /* tp_iter */
5132 (iternextfunc)striter_next, /* tp_iternext */
5133 striter_methods, /* tp_methods */
5134 0,
5135};
5136
5137static PyObject *
5138str_iter(PyObject *seq)
5139{
5140 striterobject *it;
5141
5142 if (!PyString_Check(seq)) {
5143 PyErr_BadInternalCall();
5144 return NULL;
5145 }
5146 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5147 if (it == NULL)
5148 return NULL;
5149 it->it_index = 0;
5150 Py_INCREF(seq);
5151 it->it_seq = (PyStringObject *)seq;
5152 _PyObject_GC_TRACK(it);
5153 return (PyObject *)it;
5154}