blob: 6156b2f046b147baa61f00dace7c61e35f3b2c74 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
406 /* Convert Unicode to a string using the default encoding */
407 if (PyUnicode_Check(v)) {
408 PyObject *temp = v;
409 v = PyUnicode_AsEncodedString(v, NULL, NULL);
410 Py_DECREF(temp);
411 if (v == NULL)
412 goto onError;
413 }
414 if (!PyString_Check(v)) {
415 PyErr_Format(PyExc_TypeError,
416 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000417 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000418 Py_DECREF(v);
419 goto onError;
420 }
421
422 return v;
423
424 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 return NULL;
426}
427
428PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000429 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 const char *encoding,
431 const char *errors)
432{
433 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000434
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435 str = PyString_FromStringAndSize(s, size);
436 if (str == NULL)
437 return NULL;
438 v = PyString_AsEncodedString(str, encoding, errors);
439 Py_DECREF(str);
440 return v;
441}
442
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000443PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444 const char *encoding,
445 const char *errors)
446{
447 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000448
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 if (!PyString_Check(str)) {
450 PyErr_BadArgument();
451 goto onError;
452 }
453
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000457
458 /* Encode via the codec registry */
459 v = PyCodec_Encode(str, encoding, errors);
460 if (v == NULL)
461 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462
463 return v;
464
465 onError:
466 return NULL;
467}
468
469PyObject *PyString_AsEncodedString(PyObject *str,
470 const char *encoding,
471 const char *errors)
472{
473 PyObject *v;
474
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000475 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000476 if (v == NULL)
477 goto onError;
478
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000490 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000504 switch (PyString_CHECK_INTERNED(op)) {
505 case SSTATE_NOT_INTERNED:
506 break;
507
508 case SSTATE_INTERNED_MORTAL:
509 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000510 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000511 if (PyDict_DelItem(interned, op) != 0)
512 Py_FatalError(
513 "deletion of interned string failed");
514 break;
515
516 case SSTATE_INTERNED_IMMORTAL:
517 Py_FatalError("Immortal interned string died.");
518
519 default:
520 Py_FatalError("Inconsistent interned string state.");
521 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000523}
524
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000525/* Unescape a backslash-escaped string. If unicode is non-zero,
526 the string is a u-literal. If recode_encoding is non-zero,
527 the string is UTF-8 encoded and should be re-encoded in the
528 specified encoding. */
529
530PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000533 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 const char *recode_encoding)
535{
536 int c;
537 char *p, *buf;
538 const char *end;
539 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000541 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000542 if (v == NULL)
543 return NULL;
544 p = buf = PyString_AsString(v);
545 end = s + len;
546 while (s < end) {
547 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000548 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000549 if (recode_encoding && (*s & 0x80)) {
550 PyObject *u, *w;
551 char *r;
552 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 t = s;
555 /* Decode non-ASCII bytes as UTF-8. */
556 while (t < end && (*t & 0x80)) t++;
557 u = PyUnicode_DecodeUTF8(s, t - s, errors);
558 if(!u) goto failed;
559
560 /* Recode them in target encoding. */
561 w = PyUnicode_AsEncodedString(
562 u, recode_encoding, errors);
563 Py_DECREF(u);
564 if (!w) goto failed;
565
566 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 assert(PyString_Check(w));
568 r = PyString_AS_STRING(w);
569 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000570 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000571 p += rn;
572 Py_DECREF(w);
573 s = t;
574 } else {
575 *p++ = *s++;
576 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577 continue;
578 }
579 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000580 if (s==end) {
581 PyErr_SetString(PyExc_ValueError,
582 "Trailing \\ in string");
583 goto failed;
584 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 switch (*s++) {
586 /* XXX This assumes ASCII! */
587 case '\n': break;
588 case '\\': *p++ = '\\'; break;
589 case '\'': *p++ = '\''; break;
590 case '\"': *p++ = '\"'; break;
591 case 'b': *p++ = '\b'; break;
592 case 'f': *p++ = '\014'; break; /* FF */
593 case 't': *p++ = '\t'; break;
594 case 'n': *p++ = '\n'; break;
595 case 'r': *p++ = '\r'; break;
596 case 'v': *p++ = '\013'; break; /* VT */
597 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
598 case '0': case '1': case '2': case '3':
599 case '4': case '5': case '6': case '7':
600 c = s[-1] - '0';
601 if ('0' <= *s && *s <= '7') {
602 c = (c<<3) + *s++ - '0';
603 if ('0' <= *s && *s <= '7')
604 c = (c<<3) + *s++ - '0';
605 }
606 *p++ = c;
607 break;
608 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000609 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000610 && isxdigit(Py_CHARMASK(s[1]))) {
611 unsigned int x = 0;
612 c = Py_CHARMASK(*s);
613 s++;
614 if (isdigit(c))
615 x = c - '0';
616 else if (islower(c))
617 x = 10 + c - 'a';
618 else
619 x = 10 + c - 'A';
620 x = x << 4;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x += c - '0';
625 else if (islower(c))
626 x += 10 + c - 'a';
627 else
628 x += 10 + c - 'A';
629 *p++ = x;
630 break;
631 }
632 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000633 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000635 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 }
637 if (strcmp(errors, "replace") == 0) {
638 *p++ = '?';
639 } else if (strcmp(errors, "ignore") == 0)
640 /* do nothing */;
641 else {
642 PyErr_Format(PyExc_ValueError,
643 "decoding error; "
644 "unknown error handling code: %.400s",
645 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000646 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 default:
649 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000650 s--;
651 goto non_esc; /* an arbitry number of unescaped
652 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 }
654 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000655 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 return v;
658 failed:
659 Py_DECREF(v);
660 return NULL;
661}
662
Thomas Wouters477c8d52006-05-27 19:21:47 +0000663/* -------------------------------------------------------------------- */
664/* object api */
665
Martin v. Löwis18e16552006-02-15 17:27:45 +0000666static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667string_getsize(register PyObject *op)
668{
669 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671 if (PyString_AsStringAndSize(op, &s, &len))
672 return -1;
673 return len;
674}
675
676static /*const*/ char *
677string_getbuffer(register PyObject *op)
678{
679 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000680 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000681 if (PyString_AsStringAndSize(op, &s, &len))
682 return NULL;
683 return s;
684}
685
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000687PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000688{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000689 if (PyUnicode_Check(op)) {
690 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
691 if (!op)
692 return -1;
693 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000702 if (PyUnicode_Check(op)) {
703 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
704 if (!op)
705 return NULL;
706 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (!PyString_Check(op))
708 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710}
711
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712int
713PyString_AsStringAndSize(register PyObject *obj,
714 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716{
717 if (s == NULL) {
718 PyErr_BadInternalCall();
719 return -1;
720 }
721
722 if (!PyString_Check(obj)) {
723 if (PyUnicode_Check(obj)) {
724 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
725 if (obj == NULL)
726 return -1;
727 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000728 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000731 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000732 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 return -1;
734 }
735 }
736
737 *s = PyString_AS_STRING(obj);
738 if (len != NULL)
739 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000740 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000741 PyErr_SetString(PyExc_TypeError,
742 "expected string without null bytes");
743 return -1;
744 }
745 return 0;
746}
747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749/* Methods */
750
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751#define STRINGLIB_CHAR char
752
753#define STRINGLIB_CMP memcmp
754#define STRINGLIB_LEN PyString_GET_SIZE
755#define STRINGLIB_NEW PyString_FromStringAndSize
756#define STRINGLIB_STR PyString_AS_STRING
757
758#define STRINGLIB_EMPTY nullstring
759
760#include "stringlib/fastsearch.h"
761
762#include "stringlib/count.h"
763#include "stringlib/find.h"
764#include "stringlib/partition.h"
765
766
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767static int
Fred Drakeba096332000-07-09 07:04:36 +0000768string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000770 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000773
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000774 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000775 if (! PyString_CheckExact(op)) {
776 int ret;
777 /* A str subclass may have its own __str__ method. */
778 op = (PyStringObject *) PyObject_Str((PyObject *)op);
779 if (op == NULL)
780 return -1;
781 ret = string_print(op, fp, flags);
782 Py_DECREF(op);
783 return ret;
784 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000786 char *data = op->ob_sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000787 Py_ssize_t size = Py_Size(op);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000788 while (size > INT_MAX) {
789 /* Very long strings cannot be written atomically.
790 * But don't write exactly INT_MAX bytes at a time
791 * to avoid memory aligment issues.
792 */
793 const int chunk_size = INT_MAX & ~0x3FFF;
794 fwrite(data, 1, chunk_size, fp);
795 data += chunk_size;
796 size -= chunk_size;
797 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000798#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000799 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000800#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000801 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000802#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000803 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000805
Thomas Wouters7e474022000-07-16 12:04:32 +0000806 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000807 quote = '\'';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000808 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
809 !memchr(op->ob_sval, '"', Py_Size(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000810 quote = '"';
811
812 fputc(quote, fp);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000813 for (i = 0; i < Py_Size(op); i++) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000816 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000817 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000818 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000819 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000820 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000821 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000822 fprintf(fp, "\\r");
823 else if (c < ' ' || c >= 0x7f)
824 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000825 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000826 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000829 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830}
831
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000832PyObject *
833PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000835 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +0000837 Py_ssize_t length = PyString_GET_SIZE(op);
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000838 size_t newsize = 3 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000839 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000840 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000841 PyErr_SetString(PyExc_OverflowError,
842 "string is too large to make repr");
843 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000844 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000846 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
848 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000849 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000850 register Py_UNICODE c;
851 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000852 int quote;
853
Thomas Wouters7e474022000-07-16 12:04:32 +0000854 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000856 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000857 char *test, *start;
858 start = PyString_AS_STRING(op);
859 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000860 if (*test == '"') {
861 quote = '\''; /* switch back to single quote */
862 goto decided;
863 }
864 else if (*test == '\'')
865 quote = '"';
866 }
867 decided:
868 ;
869 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000870
Guido van Rossum7611d1d2007-06-15 00:00:12 +0000871 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000872 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000873 /* There's at least enough room for a hex escape
874 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000875 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000877 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000879 else if (c == '\t')
880 *p++ = '\\', *p++ = 't';
881 else if (c == '\n')
882 *p++ = '\\', *p++ = 'n';
883 else if (c == '\r')
884 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000885 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000886 *p++ = '\\';
887 *p++ = 'x';
888 *p++ = hexdigits[(c & 0xf0) >> 4];
889 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000890 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000891 else
892 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000894 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000897 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
898 Py_DECREF(v);
899 return NULL;
900 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000901 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903}
904
Guido van Rossum189f1df2001-05-01 16:51:53 +0000905static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000906string_repr(PyObject *op)
907{
908 return PyString_Repr(op, 1);
909}
910
911static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000912string_str(PyObject *s)
913{
Tim Petersc9933152001-10-16 20:18:24 +0000914 assert(PyString_Check(s));
915 if (PyString_CheckExact(s)) {
916 Py_INCREF(s);
917 return s;
918 }
919 else {
920 /* Subtype -- return genuine string with the same value. */
921 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000922 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +0000923 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000924}
925
Martin v. Löwis18e16552006-02-15 17:27:45 +0000926static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000927string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000929 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000935 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000936 register PyStringObject *op;
937 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000938 if (PyUnicode_Check(bb))
939 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000940 if (PyBytes_Check(bb))
941 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000942 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000943 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000944 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 return NULL;
946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000949 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000950 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000951 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000952 Py_INCREF(bb);
953 return bb;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 Py_INCREF(a);
956 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000958 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000959 if (size < 0) {
960 PyErr_SetString(PyExc_OverflowError,
961 "strings are too large to concat");
962 return NULL;
963 }
964
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000965 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000966 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000967 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000969 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000970 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000971 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000972 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
973 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000974 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000976#undef b
977}
978
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000980string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000982 register Py_ssize_t i;
983 register Py_ssize_t j;
984 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000986 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987 if (n < 0)
988 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000989 /* watch out for overflows: the size can overflow int,
990 * and the # of bytes needed can overflow size_t
991 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000992 size = Py_Size(a) * n;
993 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000994 PyErr_SetString(PyExc_OverflowError,
995 "repeated string is too long");
996 return NULL;
997 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000998 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 Py_INCREF(a);
1000 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001 }
Tim Peterse7c05322004-06-27 17:24:49 +00001002 nbytes = (size_t)size;
1003 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001004 PyErr_SetString(PyExc_OverflowError,
1005 "repeated string is too long");
1006 return NULL;
1007 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001009 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001010 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001012 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001013 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001014 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001015 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001016 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001017 memset(op->ob_sval, a->ob_sval[0] , n);
1018 return (PyObject *) op;
1019 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001020 i = 0;
1021 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001022 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1023 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001024 }
1025 while (i < size) {
1026 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001027 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001028 i += j;
1029 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001031}
1032
1033/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001036string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001037 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001038 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039{
1040 if (i < 0)
1041 i = 0;
1042 if (j < 0)
1043 j = 0; /* Avoid signed/unsigned bug in next line */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001044 if (j > Py_Size(a))
1045 j = Py_Size(a);
1046 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001047 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001048 Py_INCREF(a);
1049 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050 }
1051 if (j < i)
1052 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054}
1055
Guido van Rossum9284a572000-03-07 15:53:43 +00001056static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001058{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001060 if (PyUnicode_Check(sub_obj))
1061 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001063 PyErr_Format(PyExc_TypeError,
1064 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001065 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001066 return -1;
1067 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001068 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001069
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001071}
1072
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001074string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001076 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001077 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001078 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 return NULL;
1081 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001082 pchar = a->ob_sval[i];
1083 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001084 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001085 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001086 else {
1087#ifdef COUNT_ALLOCS
1088 one_strings++;
1089#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001090 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001091 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001092 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093}
1094
Martin v. Löwiscd353062001-05-24 16:56:35 +00001095static PyObject*
1096string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001099 Py_ssize_t len_a, len_b;
1100 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101 PyObject *result;
1102
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001103 /* Make sure both arguments are strings. */
1104 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001105 result = Py_NotImplemented;
1106 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001107 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108 if (a == b) {
1109 switch (op) {
1110 case Py_EQ:case Py_LE:case Py_GE:
1111 result = Py_True;
1112 goto out;
1113 case Py_NE:case Py_LT:case Py_GT:
1114 result = Py_False;
1115 goto out;
1116 }
1117 }
1118 if (op == Py_EQ) {
1119 /* Supporting Py_NE here as well does not save
1120 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001121 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001123 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 result = Py_True;
1125 } else {
1126 result = Py_False;
1127 }
1128 goto out;
1129 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001130 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001131 min_len = (len_a < len_b) ? len_a : len_b;
1132 if (min_len > 0) {
1133 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1134 if (c==0)
1135 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001136 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 c = 0;
1138 if (c == 0)
1139 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1140 switch (op) {
1141 case Py_LT: c = c < 0; break;
1142 case Py_LE: c = c <= 0; break;
1143 case Py_EQ: assert(0); break; /* unreachable */
1144 case Py_NE: c = c != 0; break;
1145 case Py_GT: c = c > 0; break;
1146 case Py_GE: c = c >= 0; break;
1147 default:
1148 result = Py_NotImplemented;
1149 goto out;
1150 }
1151 result = c ? Py_True : Py_False;
1152 out:
1153 Py_INCREF(result);
1154 return result;
1155}
1156
1157int
1158_PyString_Eq(PyObject *o1, PyObject *o2)
1159{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001160 PyStringObject *a = (PyStringObject*) o1;
1161 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001162 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001163 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001164 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001165}
1166
Guido van Rossum9bfef441993-03-29 10:43:31 +00001167static long
Fred Drakeba096332000-07-09 07:04:36 +00001168string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001169{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001170 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001171 register unsigned char *p;
1172 register long x;
1173
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001174 if (a->ob_shash != -1)
1175 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001176 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001177 p = (unsigned char *) a->ob_sval;
1178 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001180 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001181 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001182 if (x == -1)
1183 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185 return x;
1186}
1187
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001188static PyObject*
1189string_subscript(PyStringObject* self, PyObject* item)
1190{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001191 if (PyIndex_Check(item)) {
1192 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001193 if (i == -1 && PyErr_Occurred())
1194 return NULL;
1195 if (i < 0)
1196 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001197 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198 }
1199 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001200 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201 char* source_buf;
1202 char* result_buf;
1203 PyObject* result;
1204
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001205 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 PyString_GET_SIZE(self),
1207 &start, &stop, &step, &slicelength) < 0) {
1208 return NULL;
1209 }
1210
1211 if (slicelength <= 0) {
1212 return PyString_FromStringAndSize("", 0);
1213 }
1214 else {
1215 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001216 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001217 if (result_buf == NULL)
1218 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001220 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221 cur += step, i++) {
1222 result_buf[i] = source_buf[cur];
1223 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001224
1225 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001226 slicelength);
1227 PyMem_Free(result_buf);
1228 return result;
1229 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001230 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001231 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001232 PyErr_Format(PyExc_TypeError,
1233 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001234 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001235 return NULL;
1236 }
1237}
1238
Martin v. Löwis18e16552006-02-15 17:27:45 +00001239static Py_ssize_t
1240string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001241{
1242 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001243 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001244 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001245 return -1;
1246 }
1247 *ptr = (void *)self->ob_sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001248 return Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001249}
1250
Martin v. Löwis18e16552006-02-15 17:27:45 +00001251static Py_ssize_t
1252string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253{
Guido van Rossum045e6881997-09-08 18:30:11 +00001254 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001255 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256 return -1;
1257}
1258
Martin v. Löwis18e16552006-02-15 17:27:45 +00001259static Py_ssize_t
1260string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001261{
1262 if ( lenp )
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001263 *lenp = Py_Size(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001264 return 1;
1265}
1266
Martin v. Löwis18e16552006-02-15 17:27:45 +00001267static Py_ssize_t
1268string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001269{
1270 if ( index != 0 ) {
1271 PyErr_SetString(PyExc_SystemError,
1272 "accessing non-existent string segment");
1273 return -1;
1274 }
1275 *ptr = self->ob_sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001276 return Py_Size(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001277}
1278
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001279static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001281 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001282 (ssizeargfunc)string_repeat, /*sq_repeat*/
1283 (ssizeargfunc)string_item, /*sq_item*/
1284 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001285 0, /*sq_ass_item*/
1286 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001287 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001288};
1289
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001290static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001292 (binaryfunc)string_subscript,
1293 0,
1294};
1295
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001296static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001297 (readbufferproc)string_buffer_getreadbuf,
1298 (writebufferproc)string_buffer_getwritebuf,
1299 (segcountproc)string_buffer_getsegcount,
1300 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001301};
1302
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303
1304
1305#define LEFTSTRIP 0
1306#define RIGHTSTRIP 1
1307#define BOTHSTRIP 2
1308
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001309/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001310static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1311
1312#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001313
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314
1315/* Don't call if length < 2 */
1316#define Py_STRING_MATCH(target, offset, pattern, length) \
1317 (target[offset] == pattern[0] && \
1318 target[offset+length-1] == pattern[length-1] && \
1319 !memcmp(target+offset+1, pattern+1, length-2) )
1320
1321
1322/* Overallocate the initial list to reduce the number of reallocs for small
1323 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1324 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1325 text (roughly 11 words per line) and field delimited data (usually 1-10
1326 fields). For large strings the split algorithms are bandwidth limited
1327 so increasing the preallocation likely will not improve things.*/
1328
1329#define MAX_PREALLOC 12
1330
1331/* 5 splits gives 6 elements */
1332#define PREALLOC_SIZE(maxsplit) \
1333 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1334
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335#define SPLIT_APPEND(data, left, right) \
1336 str = PyString_FromStringAndSize((data) + (left), \
1337 (right) - (left)); \
1338 if (str == NULL) \
1339 goto onError; \
1340 if (PyList_Append(list, str)) { \
1341 Py_DECREF(str); \
1342 goto onError; \
1343 } \
1344 else \
1345 Py_DECREF(str);
1346
Thomas Wouters477c8d52006-05-27 19:21:47 +00001347#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348 str = PyString_FromStringAndSize((data) + (left), \
1349 (right) - (left)); \
1350 if (str == NULL) \
1351 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001352 if (count < MAX_PREALLOC) { \
1353 PyList_SET_ITEM(list, count, str); \
1354 } else { \
1355 if (PyList_Append(list, str)) { \
1356 Py_DECREF(str); \
1357 goto onError; \
1358 } \
1359 else \
1360 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001362 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363
Thomas Wouters477c8d52006-05-27 19:21:47 +00001364/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001365#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001366
1367#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1368#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1369#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1370#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1371
1372Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001373split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001376 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378
1379 if (list == NULL)
1380 return NULL;
1381
Thomas Wouters477c8d52006-05-27 19:21:47 +00001382 i = j = 0;
1383
1384 while (maxsplit-- > 0) {
1385 SKIP_SPACE(s, i, len);
1386 if (i==len) break;
1387 j = i; i++;
1388 SKIP_NONSPACE(s, i, len);
1389 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001391
1392 if (i < len) {
1393 /* Only occurs when maxsplit was reached */
1394 /* Skip any remaining whitespace and copy to end of string */
1395 SKIP_SPACE(s, i, len);
1396 if (i != len)
1397 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001399 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001401 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 Py_DECREF(list);
1403 return NULL;
1404}
1405
Thomas Wouters477c8d52006-05-27 19:21:47 +00001406Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001407split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001409 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001411 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412
1413 if (list == NULL)
1414 return NULL;
1415
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416 i = j = 0;
1417 while ((j < len) && (maxcount-- > 0)) {
1418 for(; j<len; j++) {
1419 /* I found that using memchr makes no difference */
1420 if (s[j] == ch) {
1421 SPLIT_ADD(s, i, j);
1422 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001424 }
1425 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001426 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001427 if (i <= len) {
1428 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001429 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001430 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001431 return list;
1432
1433 onError:
1434 Py_DECREF(list);
1435 return NULL;
1436}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001438PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439"S.split([sep [,maxsplit]]) -> list of strings\n\
1440\n\
1441Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001442delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001443splits are done. If sep is not specified or is None, any\n\
1444whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445
1446static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001447string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001449 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001450 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001452 PyObject *list, *str, *subobj = Py_None;
1453#ifdef USE_FAST
1454 Py_ssize_t pos;
1455#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001459 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001460 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001463 if (PyString_Check(subobj)) {
1464 sub = PyString_AS_STRING(subobj);
1465 n = PyString_GET_SIZE(subobj);
1466 }
1467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1469 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1470 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001471
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 if (n == 0) {
1473 PyErr_SetString(PyExc_ValueError, "empty separator");
1474 return NULL;
1475 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001476 else if (n == 1)
1477 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478
Thomas Wouters477c8d52006-05-27 19:21:47 +00001479 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 if (list == NULL)
1481 return NULL;
1482
Thomas Wouters477c8d52006-05-27 19:21:47 +00001483#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001485 while (maxsplit-- > 0) {
1486 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1487 if (pos < 0)
1488 break;
1489 j = i+pos;
1490 SPLIT_ADD(s, i, j);
1491 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001493#else
1494 i = j = 0;
1495 while ((j+n <= len) && (maxsplit-- > 0)) {
1496 for (; j+n <= len; j++) {
1497 if (Py_STRING_MATCH(s, j, sub, n)) {
1498 SPLIT_ADD(s, i, j);
1499 i = j = j + n;
1500 break;
1501 }
1502 }
1503 }
1504#endif
1505 SPLIT_ADD(s, i, len);
1506 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 return list;
1508
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 Py_DECREF(list);
1511 return NULL;
1512}
1513
Thomas Wouters477c8d52006-05-27 19:21:47 +00001514PyDoc_STRVAR(partition__doc__,
1515"S.partition(sep) -> (head, sep, tail)\n\
1516\n\
1517Searches for the separator sep in S, and returns the part before it,\n\
1518the separator itself, and the part after it. If the separator is not\n\
1519found, returns S and two empty strings.");
1520
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001521static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001522string_partition(PyStringObject *self, PyObject *sep_obj)
1523{
1524 const char *sep;
1525 Py_ssize_t sep_len;
1526
1527 if (PyString_Check(sep_obj)) {
1528 sep = PyString_AS_STRING(sep_obj);
1529 sep_len = PyString_GET_SIZE(sep_obj);
1530 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001531 else if (PyUnicode_Check(sep_obj))
1532 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001533 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1534 return NULL;
1535
1536 return stringlib_partition(
1537 (PyObject*) self,
1538 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1539 sep_obj, sep, sep_len
1540 );
1541}
1542
1543PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001544"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001545\n\
1546Searches for the separator sep in S, starting at the end of S, and returns\n\
1547the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001548separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001549
1550static PyObject *
1551string_rpartition(PyStringObject *self, PyObject *sep_obj)
1552{
1553 const char *sep;
1554 Py_ssize_t sep_len;
1555
1556 if (PyString_Check(sep_obj)) {
1557 sep = PyString_AS_STRING(sep_obj);
1558 sep_len = PyString_GET_SIZE(sep_obj);
1559 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 else if (PyUnicode_Check(sep_obj))
1561 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1563 return NULL;
1564
1565 return stringlib_rpartition(
1566 (PyObject*) self,
1567 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1568 sep_obj, sep, sep_len
1569 );
1570}
1571
1572Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001573rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001574{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001575 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001576 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001578
1579 if (list == NULL)
1580 return NULL;
1581
Thomas Wouters477c8d52006-05-27 19:21:47 +00001582 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001583
Thomas Wouters477c8d52006-05-27 19:21:47 +00001584 while (maxsplit-- > 0) {
1585 RSKIP_SPACE(s, i);
1586 if (i<0) break;
1587 j = i; i--;
1588 RSKIP_NONSPACE(s, i);
1589 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 if (i >= 0) {
1592 /* Only occurs when maxsplit was reached */
1593 /* Skip any remaining whitespace and copy to beginning of string */
1594 RSKIP_SPACE(s, i);
1595 if (i >= 0)
1596 SPLIT_ADD(s, 0, i + 1);
1597
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001599 FIX_PREALLOC_SIZE(list);
1600 if (PyList_Reverse(list) < 0)
1601 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001602 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001603 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001604 Py_DECREF(list);
1605 return NULL;
1606}
1607
Thomas Wouters477c8d52006-05-27 19:21:47 +00001608Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001609rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001610{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001611 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001612 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001613 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001614
1615 if (list == NULL)
1616 return NULL;
1617
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 i = j = len - 1;
1619 while ((i >= 0) && (maxcount-- > 0)) {
1620 for (; i >= 0; i--) {
1621 if (s[i] == ch) {
1622 SPLIT_ADD(s, i + 1, j + 1);
1623 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001624 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001625 }
1626 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627 }
1628 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001629 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001630 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001631 FIX_PREALLOC_SIZE(list);
1632 if (PyList_Reverse(list) < 0)
1633 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634 return list;
1635
1636 onError:
1637 Py_DECREF(list);
1638 return NULL;
1639}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001640
1641PyDoc_STRVAR(rsplit__doc__,
1642"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1643\n\
1644Return a list of the words in the string S, using sep as the\n\
1645delimiter string, starting at the end of the string and working\n\
1646to the front. If maxsplit is given, at most maxsplit splits are\n\
1647done. If sep is not specified or is None, any whitespace string\n\
1648is a separator.");
1649
1650static PyObject *
1651string_rsplit(PyStringObject *self, PyObject *args)
1652{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001653 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001654 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001655 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001656 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001658 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659 return NULL;
1660 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001661 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662 if (subobj == Py_None)
1663 return rsplit_whitespace(s, len, maxsplit);
1664 if (PyString_Check(subobj)) {
1665 sub = PyString_AS_STRING(subobj);
1666 n = PyString_GET_SIZE(subobj);
1667 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001668 else if (PyUnicode_Check(subobj))
1669 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001670 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1671 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001672
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001673 if (n == 0) {
1674 PyErr_SetString(PyExc_ValueError, "empty separator");
1675 return NULL;
1676 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001677 else if (n == 1)
1678 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679
Thomas Wouters477c8d52006-05-27 19:21:47 +00001680 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (list == NULL)
1682 return NULL;
1683
1684 j = len;
1685 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001686
Thomas Wouters477c8d52006-05-27 19:21:47 +00001687 while ( (i >= 0) && (maxsplit-- > 0) ) {
1688 for (; i>=0; i--) {
1689 if (Py_STRING_MATCH(s, i, sub, n)) {
1690 SPLIT_ADD(s, i + n, j);
1691 j = i;
1692 i -= n;
1693 break;
1694 }
1695 }
1696 }
1697 SPLIT_ADD(s, 0, j);
1698 FIX_PREALLOC_SIZE(list);
1699 if (PyList_Reverse(list) < 0)
1700 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 return list;
1702
Thomas Wouters477c8d52006-05-27 19:21:47 +00001703onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001704 Py_DECREF(list);
1705 return NULL;
1706}
1707
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001709PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710"S.join(sequence) -> string\n\
1711\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001713sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714
1715static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001716string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717{
1718 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001719 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001722 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001723 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001724 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001725 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726
Tim Peters19fe14e2001-01-19 03:03:47 +00001727 seq = PySequence_Fast(orig, "");
1728 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001729 return NULL;
1730 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001731
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001732 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001733 if (seqlen == 0) {
1734 Py_DECREF(seq);
1735 return PyString_FromString("");
1736 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001738 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001739 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1740 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001741 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001742 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001743 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001745
Raymond Hettinger674f2412004-08-23 23:23:54 +00001746 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001747 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001748 * Do a pre-pass to figure out the total amount of space we'll
1749 * need (sz), see whether any argument is absurd, and defer to
1750 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001751 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001752 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001753 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001754 item = PySequence_Fast_GET_ITEM(seq, i);
1755 if (!PyString_Check(item)){
1756 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001757 /* Defer to Unicode join.
1758 * CAUTION: There's no gurantee that the
1759 * original sequence can be iterated over
1760 * again, so we must pass seq here.
1761 */
1762 PyObject *result;
1763 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001764 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001765 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766 }
1767 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001768 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001769 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001770 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001771 Py_DECREF(seq);
1772 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 sz += PyString_GET_SIZE(item);
1775 if (i != 0)
1776 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001777 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001778 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001779 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001780 Py_DECREF(seq);
1781 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 }
1784
1785 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001786 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001787 if (res == NULL) {
1788 Py_DECREF(seq);
1789 return NULL;
1790 }
1791
1792 /* Catenate everything. */
1793 p = PyString_AS_STRING(res);
1794 for (i = 0; i < seqlen; ++i) {
1795 size_t n;
1796 item = PySequence_Fast_GET_ITEM(seq, i);
1797 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001798 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 p += n;
1800 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001801 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001802 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001803 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001805
Jeremy Hylton49048292000-07-11 03:28:17 +00001806 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808}
1809
Tim Peters52e155e2001-06-16 05:42:57 +00001810PyObject *
1811_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001812{
Tim Petersa7259592001-06-16 05:11:17 +00001813 assert(sep != NULL && PyString_Check(sep));
1814 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001815 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001816}
1817
Thomas Wouters477c8d52006-05-27 19:21:47 +00001818Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001819string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001820{
1821 if (*end > len)
1822 *end = len;
1823 else if (*end < 0)
1824 *end += len;
1825 if (*end < 0)
1826 *end = 0;
1827 if (*start < 0)
1828 *start += len;
1829 if (*start < 0)
1830 *start = 0;
1831}
1832
Thomas Wouters477c8d52006-05-27 19:21:47 +00001833Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001834string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001837 const char *sub;
1838 Py_ssize_t sub_len;
1839 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840
Thomas Wouters477c8d52006-05-27 19:21:47 +00001841 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1842 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 return -2;
1844 if (PyString_Check(subobj)) {
1845 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001846 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001847 }
1848 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001849 return PyUnicode_Find(
1850 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001851 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001852 /* XXX - the "expected a character buffer object" is pretty
1853 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 return -2;
1855
Thomas Wouters477c8d52006-05-27 19:21:47 +00001856 if (dir > 0)
1857 return stringlib_find_slice(
1858 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1859 sub, sub_len, start, end);
1860 else
1861 return stringlib_rfind_slice(
1862 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1863 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864}
1865
1866
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001867PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868"S.find(sub [,start [,end]]) -> int\n\
1869\n\
1870Return the lowest index in S where substring sub is found,\n\
1871such that sub is contained within s[start,end]. Optional\n\
1872arguments start and end are interpreted as in slice notation.\n\
1873\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001874Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875
1876static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001877string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001879 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880 if (result == -2)
1881 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001882 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883}
1884
1885
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001886PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887"S.index(sub [,start [,end]]) -> int\n\
1888\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001889Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890
1891static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001892string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001894 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895 if (result == -2)
1896 return NULL;
1897 if (result == -1) {
1898 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001899 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return NULL;
1901 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903}
1904
1905
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001906PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907"S.rfind(sub [,start [,end]]) -> int\n\
1908\n\
1909Return the highest index in S where substring sub is found,\n\
1910such that sub is contained within s[start,end]. Optional\n\
1911arguments start and end are interpreted as in slice notation.\n\
1912\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001913Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914
1915static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001916string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001918 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 if (result == -2)
1920 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001921 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922}
1923
1924
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001925PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926"S.rindex(sub [,start [,end]]) -> int\n\
1927\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001928Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929
1930static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001931string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001933 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 if (result == -2)
1935 return NULL;
1936 if (result == -1) {
1937 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001938 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939 return NULL;
1940 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942}
1943
1944
Thomas Wouters477c8d52006-05-27 19:21:47 +00001945Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001946do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1947{
1948 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001949 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001950 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001951 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1952 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001953
1954 i = 0;
1955 if (striptype != RIGHTSTRIP) {
1956 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1957 i++;
1958 }
1959 }
1960
1961 j = len;
1962 if (striptype != LEFTSTRIP) {
1963 do {
1964 j--;
1965 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1966 j++;
1967 }
1968
1969 if (i == 0 && j == len && PyString_CheckExact(self)) {
1970 Py_INCREF(self);
1971 return (PyObject*)self;
1972 }
1973 else
1974 return PyString_FromStringAndSize(s+i, j-i);
1975}
1976
1977
Thomas Wouters477c8d52006-05-27 19:21:47 +00001978Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001979do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
1981 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001982 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984 i = 0;
1985 if (striptype != RIGHTSTRIP) {
1986 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1987 i++;
1988 }
1989 }
1990
1991 j = len;
1992 if (striptype != LEFTSTRIP) {
1993 do {
1994 j--;
1995 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1996 j++;
1997 }
1998
Tim Peters8fa5dd02001-09-12 02:18:30 +00001999 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 Py_INCREF(self);
2001 return (PyObject*)self;
2002 }
2003 else
2004 return PyString_FromStringAndSize(s+i, j-i);
2005}
2006
2007
Thomas Wouters477c8d52006-05-27 19:21:47 +00002008Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002009do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2010{
2011 PyObject *sep = NULL;
2012
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002013 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002014 return NULL;
2015
2016 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002017 if (PyString_Check(sep))
2018 return do_xstrip(self, striptype, sep);
2019 else if (PyUnicode_Check(sep)) {
2020 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2021 PyObject *res;
2022 if (uniself==NULL)
2023 return NULL;
2024 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2025 striptype, sep);
2026 Py_DECREF(uniself);
2027 return res;
2028 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002029 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002030 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002031 STRIPNAME(striptype));
2032 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002033 }
2034
2035 return do_strip(self, striptype);
2036}
2037
2038
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002039PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002040"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041\n\
2042Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002043whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002044If chars is given and not None, remove characters in chars instead.\n\
2045If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046
2047static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002048string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002050 if (PyTuple_GET_SIZE(args) == 0)
2051 return do_strip(self, BOTHSTRIP); /* Common case */
2052 else
2053 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054}
2055
2056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002058"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002060Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002061If chars is given and not None, remove characters in chars instead.\n\
2062If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
2064static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002065string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067 if (PyTuple_GET_SIZE(args) == 0)
2068 return do_strip(self, LEFTSTRIP); /* Common case */
2069 else
2070 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071}
2072
2073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002074PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002075"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002078If chars is given and not None, remove characters in chars instead.\n\
2079If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080
2081static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002082string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084 if (PyTuple_GET_SIZE(args) == 0)
2085 return do_strip(self, RIGHTSTRIP); /* Common case */
2086 else
2087 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088}
2089
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.lower() -> string\n\
2093\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002094Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095
Thomas Wouters477c8d52006-05-27 19:21:47 +00002096/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2097#ifndef _tolower
2098#define _tolower tolower
2099#endif
2100
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002102string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002104 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002105 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002106 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002108 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002109 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002111
2112 s = PyString_AS_STRING(newobj);
2113
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002114 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002117 int c = Py_CHARMASK(s[i]);
2118 if (isupper(c))
2119 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002121
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002122 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123}
2124
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126"S.upper() -> string\n\
2127\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129
Thomas Wouters477c8d52006-05-27 19:21:47 +00002130#ifndef _toupper
2131#define _toupper toupper
2132#endif
2133
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002135string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002137 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002138 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002139 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002141 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002144
2145 s = PyString_AS_STRING(newobj);
2146
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002147 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002148
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002150 int c = Py_CHARMASK(s[i]);
2151 if (islower(c))
2152 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002154
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002155 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156}
2157
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002158PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159"S.title() -> string\n\
2160\n\
2161Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002162characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163
2164static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002165string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166{
2167 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002168 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002170 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002172 newobj = PyString_FromStringAndSize(NULL, n);
2173 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002175 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 for (i = 0; i < n; i++) {
2177 int c = Py_CHARMASK(*s++);
2178 if (islower(c)) {
2179 if (!previous_is_cased)
2180 c = toupper(c);
2181 previous_is_cased = 1;
2182 } else if (isupper(c)) {
2183 if (previous_is_cased)
2184 c = tolower(c);
2185 previous_is_cased = 1;
2186 } else
2187 previous_is_cased = 0;
2188 *s_new++ = c;
2189 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002190 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191}
2192
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194"S.capitalize() -> string\n\
2195\n\
2196Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002197capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198
2199static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002200string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201{
2202 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002203 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002204 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002206 newobj = PyString_FromStringAndSize(NULL, n);
2207 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002209 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210 if (0 < n) {
2211 int c = Py_CHARMASK(*s++);
2212 if (islower(c))
2213 *s_new = toupper(c);
2214 else
2215 *s_new = c;
2216 s_new++;
2217 }
2218 for (i = 1; i < n; i++) {
2219 int c = Py_CHARMASK(*s++);
2220 if (isupper(c))
2221 *s_new = tolower(c);
2222 else
2223 *s_new = c;
2224 s_new++;
2225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002226 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227}
2228
2229
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002230PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231"S.count(sub[, start[, end]]) -> int\n\
2232\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233Return the number of non-overlapping occurrences of substring sub in\n\
2234string S[start:end]. Optional arguments start and end are interpreted\n\
2235as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
2237static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002238string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 PyObject *sub_obj;
2241 const char *str = PyString_AS_STRING(self), *sub;
2242 Py_ssize_t sub_len;
2243 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244
Thomas Wouters477c8d52006-05-27 19:21:47 +00002245 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2246 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002248
Thomas Wouters477c8d52006-05-27 19:21:47 +00002249 if (PyString_Check(sub_obj)) {
2250 sub = PyString_AS_STRING(sub_obj);
2251 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002254 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002255 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002256 if (count == -1)
2257 return NULL;
2258 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002259 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002260 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002261 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 return NULL;
2263
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002265
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 return PyInt_FromSsize_t(
2267 stringlib_count(str + start, end - start, sub, sub_len)
2268 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269}
2270
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002271PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272"S.swapcase() -> string\n\
2273\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002275converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276
2277static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002278string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279{
2280 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002281 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002282 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002284 newobj = PyString_FromStringAndSize(NULL, n);
2285 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002287 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288 for (i = 0; i < n; i++) {
2289 int c = Py_CHARMASK(*s++);
2290 if (islower(c)) {
2291 *s_new = toupper(c);
2292 }
2293 else if (isupper(c)) {
2294 *s_new = tolower(c);
2295 }
2296 else
2297 *s_new = c;
2298 s_new++;
2299 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002300 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301}
2302
2303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305"S.translate(table [,deletechars]) -> string\n\
2306\n\
2307Return a copy of the string S, where all characters occurring\n\
2308in the optional argument deletechars are removed, and the\n\
2309remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002310translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311
2312static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002313string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002316 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002317 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002319 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002320 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 PyObject *result;
2322 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002323 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002325 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328
2329 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002330 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 tablen = PyString_GET_SIZE(tableobj);
2332 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002333 else if (tableobj == Py_None) {
2334 table = NULL;
2335 tablen = 256;
2336 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002338 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 parameter; instead a mapping to None will cause characters
2340 to be deleted. */
2341 if (delobj != NULL) {
2342 PyErr_SetString(PyExc_TypeError,
2343 "deletions are implemented differently for unicode");
2344 return NULL;
2345 }
2346 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2347 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002348 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350
Martin v. Löwis00b61272002-12-12 20:03:19 +00002351 if (tablen != 256) {
2352 PyErr_SetString(PyExc_ValueError,
2353 "translation table must be 256 characters long");
2354 return NULL;
2355 }
2356
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 if (delobj != NULL) {
2358 if (PyString_Check(delobj)) {
2359 del_table = PyString_AS_STRING(delobj);
2360 dellen = PyString_GET_SIZE(delobj);
2361 }
2362 else if (PyUnicode_Check(delobj)) {
2363 PyErr_SetString(PyExc_TypeError,
2364 "deletions are implemented differently for unicode");
2365 return NULL;
2366 }
2367 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2368 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 }
2370 else {
2371 del_table = NULL;
2372 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 }
2374
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002375 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 result = PyString_FromStringAndSize((char *)NULL, inlen);
2377 if (result == NULL)
2378 return NULL;
2379 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002380 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381
Guido van Rossumd8faa362007-04-27 19:54:29 +00002382 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 /* If no deletions are required, use faster code */
2384 for (i = inlen; --i >= 0; ) {
2385 c = Py_CHARMASK(*input++);
2386 if (Py_CHARMASK((*output++ = table[c])) != c)
2387 changed = 1;
2388 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002389 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390 return result;
2391 Py_DECREF(result);
2392 Py_INCREF(input_obj);
2393 return input_obj;
2394 }
2395
Guido van Rossumd8faa362007-04-27 19:54:29 +00002396 if (table == NULL) {
2397 for (i = 0; i < 256; i++)
2398 trans_table[i] = Py_CHARMASK(i);
2399 } else {
2400 for (i = 0; i < 256; i++)
2401 trans_table[i] = Py_CHARMASK(table[i]);
2402 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403
2404 for (i = 0; i < dellen; i++)
2405 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2406
2407 for (i = inlen; --i >= 0; ) {
2408 c = Py_CHARMASK(*input++);
2409 if (trans_table[c] != -1)
2410 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2411 continue;
2412 changed = 1;
2413 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002414 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 Py_DECREF(result);
2416 Py_INCREF(input_obj);
2417 return input_obj;
2418 }
2419 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002420 if (inlen > 0)
2421 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 return result;
2423}
2424
2425
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426#define FORWARD 1
2427#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428
Thomas Wouters477c8d52006-05-27 19:21:47 +00002429/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430
Thomas Wouters477c8d52006-05-27 19:21:47 +00002431#define findchar(target, target_len, c) \
2432 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433
Thomas Wouters477c8d52006-05-27 19:21:47 +00002434/* String ops must return a string. */
2435/* If the object is subclass of string, create a copy */
2436Py_LOCAL(PyStringObject *)
2437return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002439 if (PyString_CheckExact(self)) {
2440 Py_INCREF(self);
2441 return self;
2442 }
2443 return (PyStringObject *)PyString_FromStringAndSize(
2444 PyString_AS_STRING(self),
2445 PyString_GET_SIZE(self));
2446}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447
Thomas Wouters477c8d52006-05-27 19:21:47 +00002448Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002449countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002450{
2451 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002452 const char *start=target;
2453 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454
Thomas Wouters477c8d52006-05-27 19:21:47 +00002455 while ( (start=findchar(start, end-start, c)) != NULL ) {
2456 count++;
2457 if (count >= maxcount)
2458 break;
2459 start += 1;
2460 }
2461 return count;
2462}
2463
2464Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002465findstring(const char *target, Py_ssize_t target_len,
2466 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002467 Py_ssize_t start,
2468 Py_ssize_t end,
2469 int direction)
2470{
2471 if (start < 0) {
2472 start += target_len;
2473 if (start < 0)
2474 start = 0;
2475 }
2476 if (end > target_len) {
2477 end = target_len;
2478 } else if (end < 0) {
2479 end += target_len;
2480 if (end < 0)
2481 end = 0;
2482 }
2483
2484 /* zero-length substrings always match at the first attempt */
2485 if (pattern_len == 0)
2486 return (direction > 0) ? start : end;
2487
2488 end -= pattern_len;
2489
2490 if (direction < 0) {
2491 for (; end >= start; end--)
2492 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2493 return end;
2494 } else {
2495 for (; start <= end; start++)
2496 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2497 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498 }
2499 return -1;
2500}
2501
Thomas Wouters477c8d52006-05-27 19:21:47 +00002502Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002503countstring(const char *target, Py_ssize_t target_len,
2504 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002505 Py_ssize_t start,
2506 Py_ssize_t end,
2507 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002509 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510
Thomas Wouters477c8d52006-05-27 19:21:47 +00002511 if (start < 0) {
2512 start += target_len;
2513 if (start < 0)
2514 start = 0;
2515 }
2516 if (end > target_len) {
2517 end = target_len;
2518 } else if (end < 0) {
2519 end += target_len;
2520 if (end < 0)
2521 end = 0;
2522 }
2523
2524 /* zero-length substrings match everywhere */
2525 if (pattern_len == 0 || maxcount == 0) {
2526 if (target_len+1 < maxcount)
2527 return target_len+1;
2528 return maxcount;
2529 }
2530
2531 end -= pattern_len;
2532 if (direction < 0) {
2533 for (; (end >= start); end--)
2534 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2535 count++;
2536 if (--maxcount <= 0) break;
2537 end -= pattern_len-1;
2538 }
2539 } else {
2540 for (; (start <= end); start++)
2541 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2542 count++;
2543 if (--maxcount <= 0)
2544 break;
2545 start += pattern_len-1;
2546 }
2547 }
2548 return count;
2549}
2550
2551
2552/* Algorithms for different cases of string replacement */
2553
2554/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2555Py_LOCAL(PyStringObject *)
2556replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002557 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002558 Py_ssize_t maxcount)
2559{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560 char *self_s, *result_s;
2561 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002562 Py_ssize_t count, i, product;
2563 PyStringObject *result;
2564
2565 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002566
Thomas Wouters477c8d52006-05-27 19:21:47 +00002567 /* 1 at the end plus 1 after every character */
2568 count = self_len+1;
2569 if (maxcount < count)
2570 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002571
Thomas Wouters477c8d52006-05-27 19:21:47 +00002572 /* Check for overflow */
2573 /* result_len = count * to_len + self_len; */
2574 product = count * to_len;
2575 if (product / to_len != count) {
2576 PyErr_SetString(PyExc_OverflowError,
2577 "replace string is too long");
2578 return NULL;
2579 }
2580 result_len = product + self_len;
2581 if (result_len < 0) {
2582 PyErr_SetString(PyExc_OverflowError,
2583 "replace string is too long");
2584 return NULL;
2585 }
2586
2587 if (! (result = (PyStringObject *)
2588 PyString_FromStringAndSize(NULL, result_len)) )
2589 return NULL;
2590
2591 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002592 result_s = PyString_AS_STRING(result);
2593
2594 /* TODO: special case single character, which doesn't need memcpy */
2595
2596 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002597 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002598 result_s += to_len;
2599 count -= 1;
2600
2601 for (i=0; i<count; i++) {
2602 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002603 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002604 result_s += to_len;
2605 }
2606
2607 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002608 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002609
2610 return result;
2611}
2612
2613/* Special case for deleting a single character */
2614/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2615Py_LOCAL(PyStringObject *)
2616replace_delete_single_character(PyStringObject *self,
2617 char from_c, Py_ssize_t maxcount)
2618{
2619 char *self_s, *result_s;
2620 char *start, *next, *end;
2621 Py_ssize_t self_len, result_len;
2622 Py_ssize_t count;
2623 PyStringObject *result;
2624
2625 self_len = PyString_GET_SIZE(self);
2626 self_s = PyString_AS_STRING(self);
2627
2628 count = countchar(self_s, self_len, from_c, maxcount);
2629 if (count == 0) {
2630 return return_self(self);
2631 }
2632
2633 result_len = self_len - count; /* from_len == 1 */
2634 assert(result_len>=0);
2635
2636 if ( (result = (PyStringObject *)
2637 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2638 return NULL;
2639 result_s = PyString_AS_STRING(result);
2640
2641 start = self_s;
2642 end = self_s + self_len;
2643 while (count-- > 0) {
2644 next = findchar(start, end-start, from_c);
2645 if (next == NULL)
2646 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002647 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002648 result_s += (next-start);
2649 start = next+1;
2650 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002651 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002652
Thomas Wouters477c8d52006-05-27 19:21:47 +00002653 return result;
2654}
2655
2656/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2657
2658Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002659replace_delete_substring(PyStringObject *self,
2660 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002661 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002662 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002663 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002664 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002665 Py_ssize_t count, offset;
2666 PyStringObject *result;
2667
2668 self_len = PyString_GET_SIZE(self);
2669 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002670
2671 count = countstring(self_s, self_len,
2672 from_s, from_len,
2673 0, self_len, 1,
2674 maxcount);
2675
2676 if (count == 0) {
2677 /* no matches */
2678 return return_self(self);
2679 }
2680
2681 result_len = self_len - (count * from_len);
2682 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002683
Thomas Wouters477c8d52006-05-27 19:21:47 +00002684 if ( (result = (PyStringObject *)
2685 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2686 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002687
Thomas Wouters477c8d52006-05-27 19:21:47 +00002688 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689
Thomas Wouters477c8d52006-05-27 19:21:47 +00002690 start = self_s;
2691 end = self_s + self_len;
2692 while (count-- > 0) {
2693 offset = findstring(start, end-start,
2694 from_s, from_len,
2695 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002696 if (offset == -1)
2697 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002698 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002699
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002700 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002701
Thomas Wouters477c8d52006-05-27 19:21:47 +00002702 result_s += (next-start);
2703 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002704 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002705 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002706 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002707}
2708
Thomas Wouters477c8d52006-05-27 19:21:47 +00002709/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2710Py_LOCAL(PyStringObject *)
2711replace_single_character_in_place(PyStringObject *self,
2712 char from_c, char to_c,
2713 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002714{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002715 char *self_s, *result_s, *start, *end, *next;
2716 Py_ssize_t self_len;
2717 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002718
Thomas Wouters477c8d52006-05-27 19:21:47 +00002719 /* The result string will be the same size */
2720 self_s = PyString_AS_STRING(self);
2721 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002722
Thomas Wouters477c8d52006-05-27 19:21:47 +00002723 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002724
Thomas Wouters477c8d52006-05-27 19:21:47 +00002725 if (next == NULL) {
2726 /* No matches; return the original string */
2727 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002728 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002729
Thomas Wouters477c8d52006-05-27 19:21:47 +00002730 /* Need to make a new string */
2731 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2732 if (result == NULL)
2733 return NULL;
2734 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002735 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736
Thomas Wouters477c8d52006-05-27 19:21:47 +00002737 /* change everything in-place, starting with this one */
2738 start = result_s + (next-self_s);
2739 *start = to_c;
2740 start++;
2741 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002742
Thomas Wouters477c8d52006-05-27 19:21:47 +00002743 while (--maxcount > 0) {
2744 next = findchar(start, end-start, from_c);
2745 if (next == NULL)
2746 break;
2747 *next = to_c;
2748 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002749 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002750
Thomas Wouters477c8d52006-05-27 19:21:47 +00002751 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002752}
2753
Thomas Wouters477c8d52006-05-27 19:21:47 +00002754/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2755Py_LOCAL(PyStringObject *)
2756replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002757 const char *from_s, Py_ssize_t from_len,
2758 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002759 Py_ssize_t maxcount)
2760{
2761 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002762 char *self_s;
2763 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002764 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002765
Thomas Wouters477c8d52006-05-27 19:21:47 +00002766 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002767
Thomas Wouters477c8d52006-05-27 19:21:47 +00002768 self_s = PyString_AS_STRING(self);
2769 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002770
Thomas Wouters477c8d52006-05-27 19:21:47 +00002771 offset = findstring(self_s, self_len,
2772 from_s, from_len,
2773 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002774 if (offset == -1) {
2775 /* No matches; return the original string */
2776 return return_self(self);
2777 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002778
Thomas Wouters477c8d52006-05-27 19:21:47 +00002779 /* Need to make a new string */
2780 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2781 if (result == NULL)
2782 return NULL;
2783 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002784 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002785
Thomas Wouters477c8d52006-05-27 19:21:47 +00002786 /* change everything in-place, starting with this one */
2787 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002788 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002789 start += from_len;
2790 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002791
Thomas Wouters477c8d52006-05-27 19:21:47 +00002792 while ( --maxcount > 0) {
2793 offset = findstring(start, end-start,
2794 from_s, from_len,
2795 0, end-start, FORWARD);
2796 if (offset==-1)
2797 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002798 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002799 start += offset+from_len;
2800 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002801
Thomas Wouters477c8d52006-05-27 19:21:47 +00002802 return result;
2803}
2804
2805/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2806Py_LOCAL(PyStringObject *)
2807replace_single_character(PyStringObject *self,
2808 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002809 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002810 Py_ssize_t maxcount)
2811{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002812 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002813 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002814 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002815 Py_ssize_t count, product;
2816 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002817
Thomas Wouters477c8d52006-05-27 19:21:47 +00002818 self_s = PyString_AS_STRING(self);
2819 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002820
Thomas Wouters477c8d52006-05-27 19:21:47 +00002821 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002822 if (count == 0) {
2823 /* no matches, return unchanged */
2824 return return_self(self);
2825 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002826
Thomas Wouters477c8d52006-05-27 19:21:47 +00002827 /* use the difference between current and new, hence the "-1" */
2828 /* result_len = self_len + count * (to_len-1) */
2829 product = count * (to_len-1);
2830 if (product / (to_len-1) != count) {
2831 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2832 return NULL;
2833 }
2834 result_len = self_len + product;
2835 if (result_len < 0) {
2836 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2837 return NULL;
2838 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002839
Thomas Wouters477c8d52006-05-27 19:21:47 +00002840 if ( (result = (PyStringObject *)
2841 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2842 return NULL;
2843 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002844
Thomas Wouters477c8d52006-05-27 19:21:47 +00002845 start = self_s;
2846 end = self_s + self_len;
2847 while (count-- > 0) {
2848 next = findchar(start, end-start, from_c);
2849 if (next == NULL)
2850 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002851
Thomas Wouters477c8d52006-05-27 19:21:47 +00002852 if (next == start) {
2853 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002854 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 result_s += to_len;
2856 start += 1;
2857 } else {
2858 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002859 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002860 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002861 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002862 result_s += to_len;
2863 start = next+1;
2864 }
2865 }
2866 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002867 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002868
Thomas Wouters477c8d52006-05-27 19:21:47 +00002869 return result;
2870}
2871
2872/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2873Py_LOCAL(PyStringObject *)
2874replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002875 const char *from_s, Py_ssize_t from_len,
2876 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002877 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002878 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002879 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002880 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002881 Py_ssize_t count, offset, product;
2882 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002883
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 self_s = PyString_AS_STRING(self);
2885 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002886
Thomas Wouters477c8d52006-05-27 19:21:47 +00002887 count = countstring(self_s, self_len,
2888 from_s, from_len,
2889 0, self_len, FORWARD, maxcount);
2890 if (count == 0) {
2891 /* no matches, return unchanged */
2892 return return_self(self);
2893 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002894
Thomas Wouters477c8d52006-05-27 19:21:47 +00002895 /* Check for overflow */
2896 /* result_len = self_len + count * (to_len-from_len) */
2897 product = count * (to_len-from_len);
2898 if (product / (to_len-from_len) != count) {
2899 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2900 return NULL;
2901 }
2902 result_len = self_len + product;
2903 if (result_len < 0) {
2904 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2905 return NULL;
2906 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002907
Thomas Wouters477c8d52006-05-27 19:21:47 +00002908 if ( (result = (PyStringObject *)
2909 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2910 return NULL;
2911 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002912
Thomas Wouters477c8d52006-05-27 19:21:47 +00002913 start = self_s;
2914 end = self_s + self_len;
2915 while (count-- > 0) {
2916 offset = findstring(start, end-start,
2917 from_s, from_len,
2918 0, end-start, FORWARD);
2919 if (offset == -1)
2920 break;
2921 next = start+offset;
2922 if (next == start) {
2923 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002924 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002925 result_s += to_len;
2926 start += from_len;
2927 } else {
2928 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002929 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002930 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002931 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002932 result_s += to_len;
2933 start = next+from_len;
2934 }
2935 }
2936 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002937 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002938
Thomas Wouters477c8d52006-05-27 19:21:47 +00002939 return result;
2940}
2941
2942
2943Py_LOCAL(PyStringObject *)
2944replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002945 const char *from_s, Py_ssize_t from_len,
2946 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002947 Py_ssize_t maxcount)
2948{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002949 if (maxcount < 0) {
2950 maxcount = PY_SSIZE_T_MAX;
2951 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2952 /* nothing to do; return the original string */
2953 return return_self(self);
2954 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002955
Thomas Wouters477c8d52006-05-27 19:21:47 +00002956 if (maxcount == 0 ||
2957 (from_len == 0 && to_len == 0)) {
2958 /* nothing to do; return the original string */
2959 return return_self(self);
2960 }
2961
2962 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002963
Thomas Wouters477c8d52006-05-27 19:21:47 +00002964 if (from_len == 0) {
2965 /* insert the 'to' string everywhere. */
2966 /* >>> "Python".replace("", ".") */
2967 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002968 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002969 }
2970
2971 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2972 /* point for an empty self string to generate a non-empty string */
2973 /* Special case so the remaining code always gets a non-empty string */
2974 if (PyString_GET_SIZE(self) == 0) {
2975 return return_self(self);
2976 }
2977
2978 if (to_len == 0) {
2979 /* delete all occurances of 'from' string */
2980 if (from_len == 1) {
2981 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002982 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002983 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002984 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002985 }
2986 }
2987
2988 /* Handle special case where both strings have the same length */
2989
2990 if (from_len == to_len) {
2991 if (from_len == 1) {
2992 return replace_single_character_in_place(
2993 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002994 from_s[0],
2995 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002996 maxcount);
2997 } else {
2998 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002999 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003000 }
3001 }
3002
3003 /* Otherwise use the more generic algorithms */
3004 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003005 return replace_single_character(self, from_s[0],
3006 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003007 } else {
3008 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003009 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003010 }
3011}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003013PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003014"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003015\n\
3016Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003017old replaced by new. If the optional argument count is\n\
3018given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003019
3020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003021string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003022{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003023 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003024 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003025 const char *from_s, *to_s;
3026 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003027
Thomas Wouters477c8d52006-05-27 19:21:47 +00003028 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003029 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003030
Thomas Wouters477c8d52006-05-27 19:21:47 +00003031 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003032 from_s = PyString_AS_STRING(from);
3033 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003034 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003035 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003036 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003037 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003038 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039 return NULL;
3040
Thomas Wouters477c8d52006-05-27 19:21:47 +00003041 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003042 to_s = PyString_AS_STRING(to);
3043 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003044 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003045 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003046 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003047 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003048 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003049 return NULL;
3050
Thomas Wouters477c8d52006-05-27 19:21:47 +00003051 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003052 from_s, from_len,
3053 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054}
3055
Thomas Wouters477c8d52006-05-27 19:21:47 +00003056/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003057
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003058/* Matches the end (direction >= 0) or start (direction < 0) of self
3059 * against substr, using the start and end arguments. Returns
3060 * -1 on error, 0 if not found and 1 if found.
3061 */
3062Py_LOCAL(int)
3063_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3064 Py_ssize_t end, int direction)
3065{
3066 Py_ssize_t len = PyString_GET_SIZE(self);
3067 Py_ssize_t slen;
3068 const char* sub;
3069 const char* str;
3070
3071 if (PyString_Check(substr)) {
3072 sub = PyString_AS_STRING(substr);
3073 slen = PyString_GET_SIZE(substr);
3074 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003075 else if (PyUnicode_Check(substr))
3076 return PyUnicode_Tailmatch((PyObject *)self,
3077 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003078 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3079 return -1;
3080 str = PyString_AS_STRING(self);
3081
3082 string_adjust_indices(&start, &end, len);
3083
3084 if (direction < 0) {
3085 /* startswith */
3086 if (start+slen > len)
3087 return 0;
3088 } else {
3089 /* endswith */
3090 if (end-start < slen || start > len)
3091 return 0;
3092
3093 if (end-slen > start)
3094 start = end - slen;
3095 }
3096 if (end-start >= slen)
3097 return ! memcmp(str+start, sub, slen);
3098 return 0;
3099}
3100
3101
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003102PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003104\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003105Return True if S starts with the specified prefix, False otherwise.\n\
3106With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003107With optional end, stop comparing S at that position.\n\
3108prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003109
3110static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003111string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003112{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003113 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003114 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003116 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003117
Guido van Rossumc6821402000-05-08 14:08:05 +00003118 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3119 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003121 if (PyTuple_Check(subobj)) {
3122 Py_ssize_t i;
3123 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3124 result = _string_tailmatch(self,
3125 PyTuple_GET_ITEM(subobj, i),
3126 start, end, -1);
3127 if (result == -1)
3128 return NULL;
3129 else if (result) {
3130 Py_RETURN_TRUE;
3131 }
3132 }
3133 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003135 result = _string_tailmatch(self, subobj, start, end, -1);
3136 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003138 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003139 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140}
3141
3142
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003143PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003144"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003145\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003146Return True if S ends with the specified suffix, False otherwise.\n\
3147With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003148With optional end, stop comparing S at that position.\n\
3149suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150
3151static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003152string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003153{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003154 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003155 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003157 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158
Guido van Rossumc6821402000-05-08 14:08:05 +00003159 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3160 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003162 if (PyTuple_Check(subobj)) {
3163 Py_ssize_t i;
3164 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3165 result = _string_tailmatch(self,
3166 PyTuple_GET_ITEM(subobj, i),
3167 start, end, +1);
3168 if (result == -1)
3169 return NULL;
3170 else if (result) {
3171 Py_RETURN_TRUE;
3172 }
3173 }
3174 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003176 result = _string_tailmatch(self, subobj, start, end, +1);
3177 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003179 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003180 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181}
3182
3183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003184PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003185"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003186\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003187Encodes S using the codec registered for encoding. encoding defaults\n\
3188to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003189handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003190a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3191'xmlcharrefreplace' as well as any other name registered with\n\
3192codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003193
3194static PyObject *
3195string_encode(PyStringObject *self, PyObject *args)
3196{
3197 char *encoding = NULL;
3198 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003199 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003200
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003201 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3202 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003203 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003204 if (v == NULL)
3205 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003206 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003207 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003208 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003209 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003210 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003211 Py_DECREF(v);
3212 return NULL;
3213 }
3214 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003215
3216 onError:
3217 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003218}
3219
3220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222"S.decode([encoding[,errors]]) -> object\n\
3223\n\
3224Decodes S using the codec registered for encoding. encoding defaults\n\
3225to the default encoding. errors may be given to set a different error\n\
3226handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003227a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3228as well as any other name registerd with codecs.register_error that is\n\
3229able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003230
3231static PyObject *
3232string_decode(PyStringObject *self, PyObject *args)
3233{
3234 char *encoding = NULL;
3235 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003236 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003237
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003238 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3239 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003240 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003241 if (v == NULL)
3242 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003243 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3244 PyErr_Format(PyExc_TypeError,
3245 "decoder did not return a string/unicode object "
3246 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003247 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003248 Py_DECREF(v);
3249 return NULL;
3250 }
3251 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003252
3253 onError:
3254 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003255}
3256
3257
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003258PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003259"S.expandtabs([tabsize]) -> string\n\
3260\n\
3261Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003262If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263
3264static PyObject*
3265string_expandtabs(PyStringObject *self, PyObject *args)
3266{
3267 const char *e, *p;
3268 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003269 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 PyObject *u;
3271 int tabsize = 8;
3272
3273 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3274 return NULL;
3275
Thomas Wouters7e474022000-07-16 12:04:32 +00003276 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003277 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003278 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3279 for (p = PyString_AS_STRING(self); p < e; p++)
3280 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003281 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003282 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003283 if (old_j > j) {
3284 PyErr_SetString(PyExc_OverflowError,
3285 "new string is too long");
3286 return NULL;
3287 }
3288 old_j = j;
3289 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290 }
3291 else {
3292 j++;
3293 if (*p == '\n' || *p == '\r') {
3294 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003295 old_j = j = 0;
3296 if (i < 0) {
3297 PyErr_SetString(PyExc_OverflowError,
3298 "new string is too long");
3299 return NULL;
3300 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003301 }
3302 }
3303
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003304 if ((i + j) < 0) {
3305 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3306 return NULL;
3307 }
3308
Guido van Rossum4c08d552000-03-10 22:55:18 +00003309 /* Second pass: create output string and fill it */
3310 u = PyString_FromStringAndSize(NULL, i + j);
3311 if (!u)
3312 return NULL;
3313
3314 j = 0;
3315 q = PyString_AS_STRING(u);
3316
3317 for (p = PyString_AS_STRING(self); p < e; p++)
3318 if (*p == '\t') {
3319 if (tabsize > 0) {
3320 i = tabsize - (j % tabsize);
3321 j += i;
3322 while (i--)
3323 *q++ = ' ';
3324 }
3325 }
3326 else {
3327 j++;
3328 *q++ = *p;
3329 if (*p == '\n' || *p == '\r')
3330 j = 0;
3331 }
3332
3333 return u;
3334}
3335
Thomas Wouters477c8d52006-05-27 19:21:47 +00003336Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003337pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338{
3339 PyObject *u;
3340
3341 if (left < 0)
3342 left = 0;
3343 if (right < 0)
3344 right = 0;
3345
Tim Peters8fa5dd02001-09-12 02:18:30 +00003346 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003347 Py_INCREF(self);
3348 return (PyObject *)self;
3349 }
3350
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003351 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352 left + PyString_GET_SIZE(self) + right);
3353 if (u) {
3354 if (left)
3355 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003356 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003357 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358 PyString_GET_SIZE(self));
3359 if (right)
3360 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3361 fill, right);
3362 }
3363
3364 return u;
3365}
3366
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003367PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003368"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003369"\n"
3370"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003371"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372
3373static PyObject *
3374string_ljust(PyStringObject *self, PyObject *args)
3375{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003376 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003377 char fillchar = ' ';
3378
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003379 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003380 return NULL;
3381
Tim Peters8fa5dd02001-09-12 02:18:30 +00003382 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003383 Py_INCREF(self);
3384 return (PyObject*) self;
3385 }
3386
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003387 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388}
3389
3390
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003391PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003392"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003393"\n"
3394"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003395"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003396
3397static PyObject *
3398string_rjust(PyStringObject *self, PyObject *args)
3399{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003400 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003401 char fillchar = ' ';
3402
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003403 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003404 return NULL;
3405
Tim Peters8fa5dd02001-09-12 02:18:30 +00003406 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 Py_INCREF(self);
3408 return (PyObject*) self;
3409 }
3410
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003411 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412}
3413
3414
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003415PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003416"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003417"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003418"Return S centered in a string of length width. Padding is\n"
3419"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420
3421static PyObject *
3422string_center(PyStringObject *self, PyObject *args)
3423{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003424 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003425 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003426 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003428 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003429 return NULL;
3430
Tim Peters8fa5dd02001-09-12 02:18:30 +00003431 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432 Py_INCREF(self);
3433 return (PyObject*) self;
3434 }
3435
3436 marg = width - PyString_GET_SIZE(self);
3437 left = marg / 2 + (marg & width & 1);
3438
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003439 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003440}
3441
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003442PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003443"S.zfill(width) -> string\n"
3444"\n"
3445"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003446"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003447
3448static PyObject *
3449string_zfill(PyStringObject *self, PyObject *args)
3450{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003451 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003452 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003453 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003454 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003455
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003456 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003457 return NULL;
3458
3459 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003460 if (PyString_CheckExact(self)) {
3461 Py_INCREF(self);
3462 return (PyObject*) self;
3463 }
3464 else
3465 return PyString_FromStringAndSize(
3466 PyString_AS_STRING(self),
3467 PyString_GET_SIZE(self)
3468 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003469 }
3470
3471 fill = width - PyString_GET_SIZE(self);
3472
3473 s = pad(self, fill, 0, '0');
3474
3475 if (s == NULL)
3476 return NULL;
3477
3478 p = PyString_AS_STRING(s);
3479 if (p[fill] == '+' || p[fill] == '-') {
3480 /* move sign to beginning of string */
3481 p[0] = p[fill];
3482 p[fill] = '0';
3483 }
3484
3485 return (PyObject*) s;
3486}
3487
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003488PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003489"S.isspace() -> bool\n\
3490\n\
3491Return True if all characters in S are whitespace\n\
3492and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493
3494static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003495string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003496{
Fred Drakeba096332000-07-09 07:04:36 +00003497 register const unsigned char *p
3498 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003499 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003500
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501 /* Shortcut for single character strings */
3502 if (PyString_GET_SIZE(self) == 1 &&
3503 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003504 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003506 /* Special case for empty strings */
3507 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003508 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003509
Guido van Rossum4c08d552000-03-10 22:55:18 +00003510 e = p + PyString_GET_SIZE(self);
3511 for (; p < e; p++) {
3512 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003513 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003514 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003515 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516}
3517
3518
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003519PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003520"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003521\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003522Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003523and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003524
3525static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003526string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003527{
Fred Drakeba096332000-07-09 07:04:36 +00003528 register const unsigned char *p
3529 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530 register const unsigned char *e;
3531
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003532 /* Shortcut for single character strings */
3533 if (PyString_GET_SIZE(self) == 1 &&
3534 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003535 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003536
3537 /* Special case for empty strings */
3538 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003539 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003540
3541 e = p + PyString_GET_SIZE(self);
3542 for (; p < e; p++) {
3543 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003544 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003545 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003546 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003547}
3548
3549
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003550PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003551"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003552\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003553Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003554and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003555
3556static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003557string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558{
Fred Drakeba096332000-07-09 07:04:36 +00003559 register const unsigned char *p
3560 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561 register const unsigned char *e;
3562
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563 /* Shortcut for single character strings */
3564 if (PyString_GET_SIZE(self) == 1 &&
3565 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003566 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567
3568 /* Special case for empty strings */
3569 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571
3572 e = p + PyString_GET_SIZE(self);
3573 for (; p < e; p++) {
3574 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578}
3579
3580
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003581PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003584Return True if all characters in S are digits\n\
3585and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586
3587static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003588string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589{
Fred Drakeba096332000-07-09 07:04:36 +00003590 register const unsigned char *p
3591 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003592 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003593
Guido van Rossum4c08d552000-03-10 22:55:18 +00003594 /* Shortcut for single character strings */
3595 if (PyString_GET_SIZE(self) == 1 &&
3596 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003597 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003599 /* Special case for empty strings */
3600 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003602
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603 e = p + PyString_GET_SIZE(self);
3604 for (; p < e; p++) {
3605 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003607 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609}
3610
3611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003612PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003614\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003615Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003616at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617
3618static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003619string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620{
Fred Drakeba096332000-07-09 07:04:36 +00003621 register const unsigned char *p
3622 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003623 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624 int cased;
3625
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626 /* Shortcut for single character strings */
3627 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003628 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003630 /* Special case for empty strings */
3631 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003633
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634 e = p + PyString_GET_SIZE(self);
3635 cased = 0;
3636 for (; p < e; p++) {
3637 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639 else if (!cased && islower(*p))
3640 cased = 1;
3641 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643}
3644
3645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003646PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003647"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003649Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003650at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651
3652static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003653string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654{
Fred Drakeba096332000-07-09 07:04:36 +00003655 register const unsigned char *p
3656 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003657 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658 int cased;
3659
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 /* Shortcut for single character strings */
3661 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003664 /* Special case for empty strings */
3665 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003666 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003667
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 e = p + PyString_GET_SIZE(self);
3669 cased = 0;
3670 for (; p < e; p++) {
3671 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 else if (!cased && isupper(*p))
3674 cased = 1;
3675 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677}
3678
3679
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003680PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003681"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003682\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003683Return True if S is a titlecased string and there is at least one\n\
3684character in S, i.e. uppercase characters may only follow uncased\n\
3685characters and lowercase characters only cased ones. Return False\n\
3686otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687
3688static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003689string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690{
Fred Drakeba096332000-07-09 07:04:36 +00003691 register const unsigned char *p
3692 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003693 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694 int cased, previous_is_cased;
3695
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696 /* Shortcut for single character strings */
3697 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003698 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003700 /* Special case for empty strings */
3701 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003703
Guido van Rossum4c08d552000-03-10 22:55:18 +00003704 e = p + PyString_GET_SIZE(self);
3705 cased = 0;
3706 previous_is_cased = 0;
3707 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003708 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003709
3710 if (isupper(ch)) {
3711 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003712 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713 previous_is_cased = 1;
3714 cased = 1;
3715 }
3716 else if (islower(ch)) {
3717 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 previous_is_cased = 1;
3720 cased = 1;
3721 }
3722 else
3723 previous_is_cased = 0;
3724 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003725 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726}
3727
3728
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003729PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003730"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731\n\
3732Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003733Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003734is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735
Guido van Rossum4c08d552000-03-10 22:55:18 +00003736static PyObject*
3737string_splitlines(PyStringObject *self, PyObject *args)
3738{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003739 register Py_ssize_t i;
3740 register Py_ssize_t j;
3741 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003742 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743 PyObject *list;
3744 PyObject *str;
3745 char *data;
3746
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003747 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 return NULL;
3749
3750 data = PyString_AS_STRING(self);
3751 len = PyString_GET_SIZE(self);
3752
Thomas Wouters477c8d52006-05-27 19:21:47 +00003753 /* This does not use the preallocated list because splitlines is
3754 usually run with hundreds of newlines. The overhead of
3755 switching between PyList_SET_ITEM and append causes about a
3756 2-3% slowdown for that common case. A smarter implementation
3757 could move the if check out, so the SET_ITEMs are done first
3758 and the appends only done when the prealloc buffer is full.
3759 That's too much work for little gain.*/
3760
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 list = PyList_New(0);
3762 if (!list)
3763 goto onError;
3764
3765 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003766 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003767
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768 /* Find a line and append it */
3769 while (i < len && data[i] != '\n' && data[i] != '\r')
3770 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771
3772 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003773 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 if (i < len) {
3775 if (data[i] == '\r' && i + 1 < len &&
3776 data[i+1] == '\n')
3777 i += 2;
3778 else
3779 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003780 if (keepends)
3781 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003783 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 j = i;
3785 }
3786 if (j < len) {
3787 SPLIT_APPEND(data, j, len);
3788 }
3789
3790 return list;
3791
3792 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003793 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 return NULL;
3795}
3796
3797#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003798#undef SPLIT_ADD
3799#undef MAX_PREALLOC
3800#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003802static PyObject *
3803string_getnewargs(PyStringObject *v)
3804{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003805 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003806}
3807
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003808
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003809static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003810string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003811 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3812 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003813 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003814 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3815 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003816 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3817 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3818 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3819 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3820 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3821 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3822 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003823 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3824 capitalize__doc__},
3825 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3826 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3827 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003828 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003829 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3830 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3831 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3832 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3833 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3834 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3835 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003836 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3837 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003838 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3839 startswith__doc__},
3840 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3841 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3842 swapcase__doc__},
3843 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3844 translate__doc__},
3845 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3846 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3847 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3848 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3849 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3850 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3851 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3852 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3853 expandtabs__doc__},
3854 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3855 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003856 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003857 {NULL, NULL} /* sentinel */
3858};
3859
Jeremy Hylton938ace62002-07-17 16:30:39 +00003860static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003861str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3862
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003863static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003864string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003865{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003866 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003867 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003868
Guido van Rossumae960af2001-08-30 03:11:59 +00003869 if (type != &PyString_Type)
3870 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003871 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003872 return NULL;
3873 if (x == NULL)
3874 return PyString_FromString("");
3875 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003876}
3877
Guido van Rossumae960af2001-08-30 03:11:59 +00003878static PyObject *
3879str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3880{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003881 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003882 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003883
3884 assert(PyType_IsSubtype(type, &PyString_Type));
3885 tmp = string_new(&PyString_Type, args, kwds);
3886 if (tmp == NULL)
3887 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003888 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003889 n = PyString_GET_SIZE(tmp);
3890 pnew = type->tp_alloc(type, n);
3891 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003892 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003893 ((PyStringObject *)pnew)->ob_shash =
3894 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003895 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003896 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003897 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003898 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003899}
3900
Guido van Rossumcacfc072002-05-24 19:01:59 +00003901static PyObject *
3902basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3903{
3904 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003905 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003906 return NULL;
3907}
3908
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003909static PyObject *
3910string_mod(PyObject *v, PyObject *w)
3911{
3912 if (!PyString_Check(v)) {
3913 Py_INCREF(Py_NotImplemented);
3914 return Py_NotImplemented;
3915 }
3916 return PyString_Format(v, w);
3917}
3918
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003919PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003920"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003921
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003922static PyNumberMethods string_as_number = {
3923 0, /*nb_add*/
3924 0, /*nb_subtract*/
3925 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003926 string_mod, /*nb_remainder*/
3927};
3928
3929
Guido van Rossumcacfc072002-05-24 19:01:59 +00003930PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003931 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003932 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003933 0,
3934 0,
3935 0, /* tp_dealloc */
3936 0, /* tp_print */
3937 0, /* tp_getattr */
3938 0, /* tp_setattr */
3939 0, /* tp_compare */
3940 0, /* tp_repr */
3941 0, /* tp_as_number */
3942 0, /* tp_as_sequence */
3943 0, /* tp_as_mapping */
3944 0, /* tp_hash */
3945 0, /* tp_call */
3946 0, /* tp_str */
3947 0, /* tp_getattro */
3948 0, /* tp_setattro */
3949 0, /* tp_as_buffer */
3950 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3951 basestring_doc, /* tp_doc */
3952 0, /* tp_traverse */
3953 0, /* tp_clear */
3954 0, /* tp_richcompare */
3955 0, /* tp_weaklistoffset */
3956 0, /* tp_iter */
3957 0, /* tp_iternext */
3958 0, /* tp_methods */
3959 0, /* tp_members */
3960 0, /* tp_getset */
3961 &PyBaseObject_Type, /* tp_base */
3962 0, /* tp_dict */
3963 0, /* tp_descr_get */
3964 0, /* tp_descr_set */
3965 0, /* tp_dictoffset */
3966 0, /* tp_init */
3967 0, /* tp_alloc */
3968 basestring_new, /* tp_new */
3969 0, /* tp_free */
3970};
3971
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003972PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003973"str(object) -> string\n\
3974\n\
3975Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003976If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003977
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003978static PyObject *str_iter(PyObject *seq);
3979
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003980PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003981 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00003982 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003983 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003984 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003985 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003986 (printfunc)string_print, /* tp_print */
3987 0, /* tp_getattr */
3988 0, /* tp_setattr */
3989 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003990 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003991 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003992 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003993 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003994 (hashfunc)string_hash, /* tp_hash */
3995 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003996 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003997 PyObject_GenericGetAttr, /* tp_getattro */
3998 0, /* tp_setattro */
3999 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00004000 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
4001 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004002 string_doc, /* tp_doc */
4003 0, /* tp_traverse */
4004 0, /* tp_clear */
4005 (richcmpfunc)string_richcompare, /* tp_richcompare */
4006 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004007 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004008 0, /* tp_iternext */
4009 string_methods, /* tp_methods */
4010 0, /* tp_members */
4011 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004012 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004013 0, /* tp_dict */
4014 0, /* tp_descr_get */
4015 0, /* tp_descr_set */
4016 0, /* tp_dictoffset */
4017 0, /* tp_init */
4018 0, /* tp_alloc */
4019 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004020 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004021};
4022
4023void
Fred Drakeba096332000-07-09 07:04:36 +00004024PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004025{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004026 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004027 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004028 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029 if (w == NULL || !PyString_Check(*pv)) {
4030 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004031 *pv = NULL;
4032 return;
4033 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004034 v = string_concat((PyStringObject *) *pv, w);
4035 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004036 *pv = v;
4037}
4038
Guido van Rossum013142a1994-08-30 08:19:36 +00004039void
Fred Drakeba096332000-07-09 07:04:36 +00004040PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004041{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004042 PyString_Concat(pv, w);
4043 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004044}
4045
4046
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047/* The following function breaks the notion that strings are immutable:
4048 it changes the size of a string. We get away with this only if there
4049 is only one module referencing the object. You can also think of it
4050 as creating a new string object and destroying the old one, only
4051 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004052 already be known to some other part of the code...
4053 Note that if there's not enough memory to resize the string, the original
4054 string object at *pv is deallocated, *pv is set to NULL, an "out of
4055 memory" exception is set, and -1 is returned. Else (on success) 0 is
4056 returned, and the value in *pv may or may not be the same as on input.
4057 As always, an extra byte is allocated for a trailing \0 byte (newsize
4058 does *not* include that), and a trailing \0 byte is stored.
4059*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004060
4061int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004062_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004063{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004064 register PyObject *v;
4065 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004066 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004067 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004068 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004069 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070 Py_DECREF(v);
4071 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004072 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004073 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004074 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004075 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004076 _Py_ForgetReference(v);
4077 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004078 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004079 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004080 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004081 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004082 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004084 _Py_NewReference(*pv);
4085 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004086 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004087 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004088 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089 return 0;
4090}
Guido van Rossume5372401993-03-16 12:15:04 +00004091
4092/* Helpers for formatstring */
4093
Thomas Wouters477c8d52006-05-27 19:21:47 +00004094Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004095getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004096{
Thomas Wouters977485d2006-02-16 15:59:12 +00004097 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004098 if (argidx < arglen) {
4099 (*p_argidx)++;
4100 if (arglen < 0)
4101 return args;
4102 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004103 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004104 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 PyErr_SetString(PyExc_TypeError,
4106 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004107 return NULL;
4108}
4109
Tim Peters38fd5b62000-09-21 05:43:11 +00004110/* Format codes
4111 * F_LJUST '-'
4112 * F_SIGN '+'
4113 * F_BLANK ' '
4114 * F_ALT '#'
4115 * F_ZERO '0'
4116 */
Guido van Rossume5372401993-03-16 12:15:04 +00004117#define F_LJUST (1<<0)
4118#define F_SIGN (1<<1)
4119#define F_BLANK (1<<2)
4120#define F_ALT (1<<3)
4121#define F_ZERO (1<<4)
4122
Thomas Wouters477c8d52006-05-27 19:21:47 +00004123Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004124formatfloat(char *buf, size_t buflen, int flags,
4125 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004126{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004127 /* fmt = '%#.' + `prec` + `type`
4128 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004129 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004130 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004131 x = PyFloat_AsDouble(v);
4132 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004133 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004134 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004135 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004136 }
Guido van Rossume5372401993-03-16 12:15:04 +00004137 if (prec < 0)
4138 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004139 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4140 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004141 /* Worst case length calc to ensure no buffer overrun:
4142
4143 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004144 fmt = %#.<prec>g
4145 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004146 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004147 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004148
4149 'f' formats:
4150 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4151 len = 1 + 50 + 1 + prec = 52 + prec
4152
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004153 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004154 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004155
4156 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004157 if (((type == 'g' || type == 'G') &&
4158 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004159 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004160 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004161 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004162 return -1;
4163 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004164 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4165 (flags&F_ALT) ? "#" : "",
4166 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004167 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004168 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004169}
4170
Tim Peters38fd5b62000-09-21 05:43:11 +00004171/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4172 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4173 * Python's regular ints.
4174 * Return value: a new PyString*, or NULL if error.
4175 * . *pbuf is set to point into it,
4176 * *plen set to the # of chars following that.
4177 * Caller must decref it when done using pbuf.
4178 * The string starting at *pbuf is of the form
4179 * "-"? ("0x" | "0X")? digit+
4180 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004181 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004182 * There will be at least prec digits, zero-filled on the left if
4183 * necessary to get that many.
4184 * val object to be converted
4185 * flags bitmask of format flags; only F_ALT is looked at
4186 * prec minimum number of digits; 0-fill on left if needed
4187 * type a character in [duoxX]; u acts the same as d
4188 *
4189 * CAUTION: o, x and X conversions on regular ints can never
4190 * produce a '-' sign, but can for Python's unbounded ints.
4191 */
4192PyObject*
4193_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4194 char **pbuf, int *plen)
4195{
4196 PyObject *result = NULL;
4197 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004198 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004199 int sign; /* 1 if '-', else 0 */
4200 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004201 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004202 int numdigits; /* len == numnondigits + numdigits */
4203 int numnondigits = 0;
4204
Guido van Rossumddefaf32007-01-14 03:31:43 +00004205 /* Avoid exceeding SSIZE_T_MAX */
4206 if (prec > PY_SSIZE_T_MAX-3) {
4207 PyErr_SetString(PyExc_OverflowError,
4208 "precision too large");
4209 return NULL;
4210 }
4211
4212
Tim Peters38fd5b62000-09-21 05:43:11 +00004213 switch (type) {
4214 case 'd':
4215 case 'u':
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004216 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004217 break;
4218 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004219 numnondigits = 2;
4220 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 break;
4222 case 'x':
4223 case 'X':
4224 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004225 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004226 break;
4227 default:
4228 assert(!"'type' not in [duoxX]");
4229 }
4230 if (!result)
4231 return NULL;
4232
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004233 buf = PyString_AsString(result);
4234 if (!buf) {
4235 Py_DECREF(result);
4236 return NULL;
4237 }
4238
Tim Peters38fd5b62000-09-21 05:43:11 +00004239 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004240 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 PyErr_BadInternalCall();
4242 return NULL;
4243 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004244 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004245 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004246 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4247 return NULL;
4248 }
4249 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004250 if (buf[len-1] == 'L') {
4251 --len;
4252 buf[len] = '\0';
4253 }
4254 sign = buf[0] == '-';
4255 numnondigits += sign;
4256 numdigits = len - numnondigits;
4257 assert(numdigits > 0);
4258
Tim Petersfff53252001-04-12 18:38:48 +00004259 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004260 if (((flags & F_ALT) == 0 &&
4261 (type == 'o' || type == 'x' || type == 'X'))) {
4262 assert(buf[sign] == '0');
4263 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4264 buf[sign+1] == 'o');
4265 numnondigits -= 2;
4266 buf += 2;
4267 len -= 2;
4268 if (sign)
4269 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 assert(len == numnondigits + numdigits);
4271 assert(numdigits > 0);
4272 }
4273
4274 /* Fill with leading zeroes to meet minimum width. */
4275 if (prec > numdigits) {
4276 PyObject *r1 = PyString_FromStringAndSize(NULL,
4277 numnondigits + prec);
4278 char *b1;
4279 if (!r1) {
4280 Py_DECREF(result);
4281 return NULL;
4282 }
4283 b1 = PyString_AS_STRING(r1);
4284 for (i = 0; i < numnondigits; ++i)
4285 *b1++ = *buf++;
4286 for (i = 0; i < prec - numdigits; i++)
4287 *b1++ = '0';
4288 for (i = 0; i < numdigits; i++)
4289 *b1++ = *buf++;
4290 *b1 = '\0';
4291 Py_DECREF(result);
4292 result = r1;
4293 buf = PyString_AS_STRING(result);
4294 len = numnondigits + prec;
4295 }
4296
4297 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004298 if (type == 'X') {
4299 /* Need to convert all lower case letters to upper case.
4300 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004301 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004302 if (buf[i] >= 'a' && buf[i] <= 'x')
4303 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004304 }
4305 *pbuf = buf;
4306 *plen = len;
4307 return result;
4308}
4309
Thomas Wouters477c8d52006-05-27 19:21:47 +00004310Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004311formatint(char *buf, size_t buflen, int flags,
4312 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004313{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004314 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4316 + 1 + 1 = 24 */
4317 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004318 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004319 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004320
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004321 x = PyInt_AsLong(v);
4322 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004323 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004324 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004325 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004326 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004327 if (x < 0 && type == 'u') {
4328 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004329 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004330 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4331 sign = "-";
4332 else
4333 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004334 if (prec < 0)
4335 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004336
4337 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004338 (type == 'x' || type == 'X' || type == 'o')) {
4339 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004340 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004341 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004342 * - when 0 is being converted, the C standard leaves off
4343 * the '0x' or '0X', which is inconsistent with other
4344 * %#x/%#X conversions and inconsistent with Python's
4345 * hex() function
4346 * - there are platforms that violate the standard and
4347 * convert 0 with the '0x' or '0X'
4348 * (Metrowerks, Compaq Tru64)
4349 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004350 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004351 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004352 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004353 * We can achieve the desired consistency by inserting our
4354 * own '0x' or '0X' prefix, and substituting %x/%X in place
4355 * of %#x/%#X.
4356 *
4357 * Note that this is the same approach as used in
4358 * formatint() in unicodeobject.c
4359 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004360 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4361 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004362 }
4363 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004364 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4365 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004366 prec, type);
4367 }
4368
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004369 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004370 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004372 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004373 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004374 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004375 return -1;
4376 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004377 if (sign[0])
4378 PyOS_snprintf(buf, buflen, fmt, -x);
4379 else
4380 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004381 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004382}
4383
Thomas Wouters477c8d52006-05-27 19:21:47 +00004384Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004385formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004386{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004387 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004388 if (PyString_Check(v)) {
4389 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004390 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004391 }
4392 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004393 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004394 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004395 }
4396 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004397 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004398}
4399
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004400/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4401
4402 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4403 chars are formatted. XXX This is a magic number. Each formatting
4404 routine does bounds checking to ensure no overflow, but a better
4405 solution may be to malloc a buffer of appropriate size for each
4406 format. For now, the current solution is sufficient.
4407*/
4408#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004409
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004410PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004411PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004412{
4413 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004414 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004415 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004416 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004417 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004418 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004419 PyObject *dict = NULL;
4420 if (format == NULL || !PyString_Check(format) || args == NULL) {
4421 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004422 return NULL;
4423 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004424 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004425 fmt = PyString_AS_STRING(format);
4426 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004427 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004428 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004429 if (result == NULL)
4430 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004431 res = PyString_AsString(result);
4432 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004433 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004434 argidx = 0;
4435 }
4436 else {
4437 arglen = -1;
4438 argidx = -2;
4439 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004440 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004441 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004442 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004443 while (--fmtcnt >= 0) {
4444 if (*fmt != '%') {
4445 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004446 rescnt = fmtcnt + 100;
4447 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004448 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004449 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004450 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004451 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004452 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004453 }
4454 *res++ = *fmt++;
4455 }
4456 else {
4457 /* Got a format specifier */
4458 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004459 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004460 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004461 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004462 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463 PyObject *v = NULL;
4464 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004465 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004466 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004467 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004468 char formatbuf[FORMATBUFLEN];
4469 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004470 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004471 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004472
Guido van Rossumda9c2711996-12-05 21:58:58 +00004473 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004474 if (*fmt == '(') {
4475 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004476 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004478 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004479
4480 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004482 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004483 goto error;
4484 }
4485 ++fmt;
4486 --fmtcnt;
4487 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004488 /* Skip over balanced parentheses */
4489 while (pcount > 0 && --fmtcnt >= 0) {
4490 if (*fmt == ')')
4491 --pcount;
4492 else if (*fmt == '(')
4493 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004494 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004495 }
4496 keylen = fmt - keystart - 1;
4497 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004498 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004499 "incomplete format key");
4500 goto error;
4501 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004502 key = PyString_FromStringAndSize(keystart,
4503 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004504 if (key == NULL)
4505 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004506 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004508 args_owned = 0;
4509 }
4510 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004512 if (args == NULL) {
4513 goto error;
4514 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004515 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004516 arglen = -1;
4517 argidx = -2;
4518 }
Guido van Rossume5372401993-03-16 12:15:04 +00004519 while (--fmtcnt >= 0) {
4520 switch (c = *fmt++) {
4521 case '-': flags |= F_LJUST; continue;
4522 case '+': flags |= F_SIGN; continue;
4523 case ' ': flags |= F_BLANK; continue;
4524 case '#': flags |= F_ALT; continue;
4525 case '0': flags |= F_ZERO; continue;
4526 }
4527 break;
4528 }
4529 if (c == '*') {
4530 v = getnextarg(args, arglen, &argidx);
4531 if (v == NULL)
4532 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 if (!PyInt_Check(v)) {
4534 PyErr_SetString(PyExc_TypeError,
4535 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004536 goto error;
4537 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004538 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004539 if (width == -1 && PyErr_Occurred())
4540 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004541 if (width < 0) {
4542 flags |= F_LJUST;
4543 width = -width;
4544 }
Guido van Rossume5372401993-03-16 12:15:04 +00004545 if (--fmtcnt >= 0)
4546 c = *fmt++;
4547 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004548 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004549 width = c - '0';
4550 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004551 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004552 if (!isdigit(c))
4553 break;
4554 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555 PyErr_SetString(
4556 PyExc_ValueError,
4557 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004558 goto error;
4559 }
4560 width = width*10 + (c - '0');
4561 }
4562 }
4563 if (c == '.') {
4564 prec = 0;
4565 if (--fmtcnt >= 0)
4566 c = *fmt++;
4567 if (c == '*') {
4568 v = getnextarg(args, arglen, &argidx);
4569 if (v == NULL)
4570 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004571 if (!PyInt_Check(v)) {
4572 PyErr_SetString(
4573 PyExc_TypeError,
4574 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004575 goto error;
4576 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004578 if (prec == -1 && PyErr_Occurred())
4579 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004580 if (prec < 0)
4581 prec = 0;
4582 if (--fmtcnt >= 0)
4583 c = *fmt++;
4584 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004585 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004586 prec = c - '0';
4587 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004588 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004589 if (!isdigit(c))
4590 break;
4591 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004592 PyErr_SetString(
4593 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004594 "prec too big");
4595 goto error;
4596 }
4597 prec = prec*10 + (c - '0');
4598 }
4599 }
4600 } /* prec */
4601 if (fmtcnt >= 0) {
4602 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004603 if (--fmtcnt >= 0)
4604 c = *fmt++;
4605 }
4606 }
4607 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004608 PyErr_SetString(PyExc_ValueError,
4609 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004610 goto error;
4611 }
4612 if (c != '%') {
4613 v = getnextarg(args, arglen, &argidx);
4614 if (v == NULL)
4615 goto error;
4616 }
4617 sign = 0;
4618 fill = ' ';
4619 switch (c) {
4620 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004621 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004622 len = 1;
4623 break;
4624 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004625 if (PyUnicode_Check(v)) {
4626 fmt = fmt_start;
4627 argidx = argidx_start;
4628 goto unicode;
4629 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004630 temp = _PyObject_Str(v);
4631 if (temp != NULL && PyUnicode_Check(temp)) {
4632 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004633 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004634 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004635 goto unicode;
4636 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004637 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004638 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004639 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004640 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004641 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004642 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004643 if (!PyString_Check(temp)) {
4644 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004645 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004646 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004647 goto error;
4648 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004649 pbuf = PyString_AS_STRING(temp);
4650 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004651 if (prec >= 0 && len > prec)
4652 len = prec;
4653 break;
4654 case 'i':
4655 case 'd':
4656 case 'u':
4657 case 'o':
4658 case 'x':
4659 case 'X':
4660 if (c == 'i')
4661 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004662 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004663 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004664 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004665 prec, c, &pbuf, &ilen);
4666 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004667 if (!temp)
4668 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004669 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004670 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004671 else {
4672 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004673 len = formatint(pbuf,
4674 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004675 flags, prec, c, v);
4676 if (len < 0)
4677 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004678 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004679 }
4680 if (flags & F_ZERO)
4681 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004682 break;
4683 case 'e':
4684 case 'E':
4685 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004686 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004687 case 'g':
4688 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004689 if (c == 'F')
4690 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004691 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004692 len = formatfloat(pbuf, sizeof(formatbuf),
4693 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004694 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004695 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004696 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004697 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004698 fill = '0';
4699 break;
4700 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004701 if (PyUnicode_Check(v)) {
4702 fmt = fmt_start;
4703 argidx = argidx_start;
4704 goto unicode;
4705 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004706 pbuf = formatbuf;
4707 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004708 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004709 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004710 break;
4711 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004712 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004713 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004714 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004715 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004716 (Py_ssize_t)(fmt - 1 -
4717 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004718 goto error;
4719 }
4720 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004721 if (*pbuf == '-' || *pbuf == '+') {
4722 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004723 len--;
4724 }
4725 else if (flags & F_SIGN)
4726 sign = '+';
4727 else if (flags & F_BLANK)
4728 sign = ' ';
4729 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004730 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004731 }
4732 if (width < len)
4733 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004734 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004735 reslen -= rescnt;
4736 rescnt = width + fmtcnt + 100;
4737 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004738 if (reslen < 0) {
4739 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004740 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004741 return PyErr_NoMemory();
4742 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004743 if (_PyString_Resize(&result, reslen) < 0) {
4744 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004745 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004746 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004747 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004748 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004749 }
4750 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004751 if (fill != ' ')
4752 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004753 rescnt--;
4754 if (width > len)
4755 width--;
4756 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004757 if ((flags & F_ALT) &&
4758 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004759 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004760 assert(pbuf[1] == c);
4761 if (fill != ' ') {
4762 *res++ = *pbuf++;
4763 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004764 }
Tim Petersfff53252001-04-12 18:38:48 +00004765 rescnt -= 2;
4766 width -= 2;
4767 if (width < 0)
4768 width = 0;
4769 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004770 }
4771 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004772 do {
4773 --rescnt;
4774 *res++ = fill;
4775 } while (--width > len);
4776 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004777 if (fill == ' ') {
4778 if (sign)
4779 *res++ = sign;
4780 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004781 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004782 assert(pbuf[0] == '0');
4783 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004784 *res++ = *pbuf++;
4785 *res++ = *pbuf++;
4786 }
4787 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004788 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004789 res += len;
4790 rescnt -= len;
4791 while (--width >= len) {
4792 --rescnt;
4793 *res++ = ' ';
4794 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004795 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004796 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004797 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004798 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004799 goto error;
4800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004801 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004802 } /* '%' */
4803 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004804 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004805 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004806 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004807 goto error;
4808 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004809 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004811 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004812 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004813 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004814
4815 unicode:
4816 if (args_owned) {
4817 Py_DECREF(args);
4818 args_owned = 0;
4819 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004820 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004821 if (PyTuple_Check(orig_args) && argidx > 0) {
4822 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004823 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004824 v = PyTuple_New(n);
4825 if (v == NULL)
4826 goto error;
4827 while (--n >= 0) {
4828 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4829 Py_INCREF(w);
4830 PyTuple_SET_ITEM(v, n, w);
4831 }
4832 args = v;
4833 } else {
4834 Py_INCREF(orig_args);
4835 args = orig_args;
4836 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004837 args_owned = 1;
4838 /* Take what we have of the result and let the Unicode formatting
4839 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004840 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004841 if (_PyString_Resize(&result, rescnt))
4842 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004843 fmtcnt = PyString_GET_SIZE(format) - \
4844 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004845 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4846 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004847 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004848 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004849 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004850 if (v == NULL)
4851 goto error;
4852 /* Paste what we have (result) to what the Unicode formatting
4853 function returned (v) and return the result (or error) */
4854 w = PyUnicode_Concat(result, v);
4855 Py_DECREF(result);
4856 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004857 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004858 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004859
Guido van Rossume5372401993-03-16 12:15:04 +00004860 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004861 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004862 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004863 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004864 }
Guido van Rossume5372401993-03-16 12:15:04 +00004865 return NULL;
4866}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004867
Guido van Rossum2a61e741997-01-18 07:55:05 +00004868void
Fred Drakeba096332000-07-09 07:04:36 +00004869PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004870{
4871 register PyStringObject *s = (PyStringObject *)(*p);
4872 PyObject *t;
4873 if (s == NULL || !PyString_Check(s))
4874 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004875 /* If it's a string subclass, we don't really know what putting
4876 it in the interned dict might do. */
4877 if (!PyString_CheckExact(s))
4878 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004879 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004880 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004881 if (interned == NULL) {
4882 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004883 if (interned == NULL) {
4884 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004885 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004886 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004887 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004888 t = PyDict_GetItem(interned, (PyObject *)s);
4889 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004890 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004891 Py_DECREF(*p);
4892 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004893 return;
4894 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004895
Armin Rigo79f7ad22004-08-07 19:27:39 +00004896 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004897 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004898 return;
4899 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004900 /* The two references in interned are not counted by refcnt.
4901 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004902 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004903 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004904}
4905
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004906void
4907PyString_InternImmortal(PyObject **p)
4908{
4909 PyString_InternInPlace(p);
4910 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4911 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4912 Py_INCREF(*p);
4913 }
4914}
4915
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916
4917PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004918PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004919{
4920 PyObject *s = PyString_FromString(cp);
4921 if (s == NULL)
4922 return NULL;
4923 PyString_InternInPlace(&s);
4924 return s;
4925}
4926
Guido van Rossum8cf04761997-08-02 02:57:45 +00004927void
Fred Drakeba096332000-07-09 07:04:36 +00004928PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004929{
4930 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004931 for (i = 0; i < UCHAR_MAX + 1; i++) {
4932 Py_XDECREF(characters[i]);
4933 characters[i] = NULL;
4934 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004935 Py_XDECREF(nullstring);
4936 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004937}
Barry Warsawa903ad982001-02-23 16:40:48 +00004938
Barry Warsawa903ad982001-02-23 16:40:48 +00004939void _Py_ReleaseInternedStrings(void)
4940{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004941 PyObject *keys;
4942 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004943 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004944 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945
4946 if (interned == NULL || !PyDict_Check(interned))
4947 return;
4948 keys = PyDict_Keys(interned);
4949 if (keys == NULL || !PyList_Check(keys)) {
4950 PyErr_Clear();
4951 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004952 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953
4954 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4955 detector, interned strings are not forcibly deallocated; rather, we
4956 give them their stolen references back, and then clear and DECREF
4957 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004958
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00004960 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4961 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004962 for (i = 0; i < n; i++) {
4963 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4964 switch (s->ob_sstate) {
4965 case SSTATE_NOT_INTERNED:
4966 /* XXX Shouldn't happen */
4967 break;
4968 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004969 Py_Refcnt(s) += 1;
4970 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004971 break;
4972 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004973 Py_Refcnt(s) += 2;
4974 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004975 break;
4976 default:
4977 Py_FatalError("Inconsistent interned string state.");
4978 }
4979 s->ob_sstate = SSTATE_NOT_INTERNED;
4980 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00004981 fprintf(stderr, "total size of all interned strings: "
4982 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4983 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004984 Py_DECREF(keys);
4985 PyDict_Clear(interned);
4986 Py_DECREF(interned);
4987 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004988}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004989
4990
4991/*********************** Str Iterator ****************************/
4992
4993typedef struct {
4994 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00004995 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004996 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
4997} striterobject;
4998
4999static void
5000striter_dealloc(striterobject *it)
5001{
5002 _PyObject_GC_UNTRACK(it);
5003 Py_XDECREF(it->it_seq);
5004 PyObject_GC_Del(it);
5005}
5006
5007static int
5008striter_traverse(striterobject *it, visitproc visit, void *arg)
5009{
5010 Py_VISIT(it->it_seq);
5011 return 0;
5012}
5013
5014static PyObject *
5015striter_next(striterobject *it)
5016{
5017 PyStringObject *seq;
5018 PyObject *item;
5019
5020 assert(it != NULL);
5021 seq = it->it_seq;
5022 if (seq == NULL)
5023 return NULL;
5024 assert(PyString_Check(seq));
5025
5026 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005027 item = PyString_FromStringAndSize(
5028 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005029 if (item != NULL)
5030 ++it->it_index;
5031 return item;
5032 }
5033
5034 Py_DECREF(seq);
5035 it->it_seq = NULL;
5036 return NULL;
5037}
5038
5039static PyObject *
5040striter_len(striterobject *it)
5041{
5042 Py_ssize_t len = 0;
5043 if (it->it_seq)
5044 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5045 return PyInt_FromSsize_t(len);
5046}
5047
Guido van Rossum49d6b072006-08-17 21:11:47 +00005048PyDoc_STRVAR(length_hint_doc,
5049 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005050
5051static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005052 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5053 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005054 {NULL, NULL} /* sentinel */
5055};
5056
5057PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00005058 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00005059 "striterator", /* tp_name */
5060 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005061 0, /* tp_itemsize */
5062 /* methods */
5063 (destructor)striter_dealloc, /* tp_dealloc */
5064 0, /* tp_print */
5065 0, /* tp_getattr */
5066 0, /* tp_setattr */
5067 0, /* tp_compare */
5068 0, /* tp_repr */
5069 0, /* tp_as_number */
5070 0, /* tp_as_sequence */
5071 0, /* tp_as_mapping */
5072 0, /* tp_hash */
5073 0, /* tp_call */
5074 0, /* tp_str */
5075 PyObject_GenericGetAttr, /* tp_getattro */
5076 0, /* tp_setattro */
5077 0, /* tp_as_buffer */
5078 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5079 0, /* tp_doc */
5080 (traverseproc)striter_traverse, /* tp_traverse */
5081 0, /* tp_clear */
5082 0, /* tp_richcompare */
5083 0, /* tp_weaklistoffset */
5084 PyObject_SelfIter, /* tp_iter */
5085 (iternextfunc)striter_next, /* tp_iternext */
5086 striter_methods, /* tp_methods */
5087 0,
5088};
5089
5090static PyObject *
5091str_iter(PyObject *seq)
5092{
5093 striterobject *it;
5094
5095 if (!PyString_Check(seq)) {
5096 PyErr_BadInternalCall();
5097 return NULL;
5098 }
5099 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5100 if (it == NULL)
5101 return NULL;
5102 it->it_index = 0;
5103 Py_INCREF(seq);
5104 it->it_seq = (PyStringObject *)seq;
5105 _PyObject_GC_TRACK(it);
5106 return (PyObject *)it;
5107}