blob: 64be0de19999f9dd0a736bc13abbaf10a7974f48 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000807 char *data = op->ob_sval;
808 Py_ssize_t size = op->ob_size;
809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000829 if (memchr(op->ob_sval, '\'', op->ob_size) &&
830 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 for (i = 0; i < op->ob_size; i++) {
835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000857 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Armin Rigo7ccbca92006-10-04 12:17:45 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000875 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000876 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000877 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
931 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
938 return a->ob_size;
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Andrew Dalke598710c2006-05-25 18:18:39 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000951 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000952 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000953 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 return NULL;
955 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000958 if ((a->ob_size == 0 || b->ob_size == 0) &&
959 PyString_CheckExact(a) && PyString_CheckExact(b)) {
960 if (a->ob_size == 0) {
961 Py_INCREF(bb);
962 return bb;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 Py_INCREF(a);
965 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966 }
967 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000968 if (size < 0) {
969 PyErr_SetString(PyExc_OverflowError,
970 "strings are too large to concat");
971 return NULL;
972 }
973
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000974 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000975 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000976 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000978 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000979 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000980 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000981 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
982 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000983 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985#undef b
986}
987
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000989string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991 register Py_ssize_t i;
992 register Py_ssize_t j;
993 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000995 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 if (n < 0)
997 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000998 /* watch out for overflows: the size can overflow int,
999 * and the # of bytes needed can overflow size_t
1000 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001002 if (n && size / n != a->ob_size) {
1003 PyErr_SetString(PyExc_OverflowError,
1004 "repeated string is too long");
1005 return NULL;
1006 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001007 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 Py_INCREF(a);
1009 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 }
Tim Peterse7c05322004-06-27 17:24:49 +00001011 nbytes = (size_t)size;
1012 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001013 PyErr_SetString(PyExc_OverflowError,
1014 "repeated string is too long");
1015 return NULL;
1016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001017 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001018 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001019 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001021 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001022 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001023 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001024 op->ob_sval[size] = '\0';
1025 if (a->ob_size == 1 && n > 0) {
1026 memset(op->ob_sval, a->ob_sval[0] , n);
1027 return (PyObject *) op;
1028 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001029 i = 0;
1030 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001031 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001033 }
1034 while (i < size) {
1035 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001036 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i += j;
1038 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040}
1041
1042/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001045string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001046 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001047 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048{
1049 if (i < 0)
1050 i = 0;
1051 if (j < 0)
1052 j = 0; /* Avoid signed/unsigned bug in next line */
1053 if (j > a->ob_size)
1054 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001055 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1056 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001057 Py_INCREF(a);
1058 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 }
1060 if (j < i)
1061 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001062 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Guido van Rossum9284a572000-03-07 15:53:43 +00001065static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001066string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001070 if (PyUnicode_Check(sub_obj))
1071 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001074 PyErr_Format(PyExc_TypeError,
1075 "'in <string>' requires string as left operand, "
1076 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001077 return -1;
1078 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001079 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001080
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001082}
1083
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001085string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001087 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001088 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001089 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 return NULL;
1092 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001093 pchar = a->ob_sval[i];
1094 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001095 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001096 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001097 else {
1098#ifdef COUNT_ALLOCS
1099 one_strings++;
1100#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001101 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001102 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001103 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001104}
1105
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106static PyObject*
1107string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001108{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001109 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001110 Py_ssize_t len_a, len_b;
1111 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001112 PyObject *result;
1113
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001114 /* Make sure both arguments are strings. */
1115 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 result = Py_NotImplemented;
1117 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001118 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 if (a == b) {
1120 switch (op) {
1121 case Py_EQ:case Py_LE:case Py_GE:
1122 result = Py_True;
1123 goto out;
1124 case Py_NE:case Py_LT:case Py_GT:
1125 result = Py_False;
1126 goto out;
1127 }
1128 }
1129 if (op == Py_EQ) {
1130 /* Supporting Py_NE here as well does not save
1131 much time, since Py_NE is rarely used. */
1132 if (a->ob_size == b->ob_size
1133 && (a->ob_sval[0] == b->ob_sval[0]
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001134 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001135 result = Py_True;
1136 } else {
1137 result = Py_False;
1138 }
1139 goto out;
1140 }
1141 len_a = a->ob_size; len_b = b->ob_size;
1142 min_len = (len_a < len_b) ? len_a : len_b;
1143 if (min_len > 0) {
1144 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1145 if (c==0)
1146 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001147 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001148 c = 0;
1149 if (c == 0)
1150 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1151 switch (op) {
1152 case Py_LT: c = c < 0; break;
1153 case Py_LE: c = c <= 0; break;
1154 case Py_EQ: assert(0); break; /* unreachable */
1155 case Py_NE: c = c != 0; break;
1156 case Py_GT: c = c > 0; break;
1157 case Py_GE: c = c >= 0; break;
1158 default:
1159 result = Py_NotImplemented;
1160 goto out;
1161 }
1162 result = c ? Py_True : Py_False;
1163 out:
1164 Py_INCREF(result);
1165 return result;
1166}
1167
1168int
1169_PyString_Eq(PyObject *o1, PyObject *o2)
1170{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001171 PyStringObject *a = (PyStringObject*) o1;
1172 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001173 return a->ob_size == b->ob_size
1174 && *a->ob_sval == *b->ob_sval
1175 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001176}
1177
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178static long
Fred Drakeba096332000-07-09 07:04:36 +00001179string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 register unsigned char *p;
1183 register long x;
1184
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 if (a->ob_shash != -1)
1186 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 len = a->ob_size;
1188 p = (unsigned char *) a->ob_sval;
1189 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001191 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001192 x ^= a->ob_size;
1193 if (x == -1)
1194 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001196 return x;
1197}
1198
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199static PyObject*
1200string_subscript(PyStringObject* self, PyObject* item)
1201{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001202 if (PyIndex_Check(item)) {
1203 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 if (i == -1 && PyErr_Occurred())
1205 return NULL;
1206 if (i < 0)
1207 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001208 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001209 }
1210 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001211 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 char* source_buf;
1213 char* result_buf;
1214 PyObject* result;
1215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 PyString_GET_SIZE(self),
1218 &start, &stop, &step, &slicelength) < 0) {
1219 return NULL;
1220 }
1221
1222 if (slicelength <= 0) {
1223 return PyString_FromStringAndSize("", 0);
1224 }
1225 else {
1226 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001227 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001228 if (result_buf == NULL)
1229 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 cur += step, i++) {
1233 result_buf[i] = source_buf[cur];
1234 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001235
1236 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001237 slicelength);
1238 PyMem_Free(result_buf);
1239 return result;
1240 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001241 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001242 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001243 PyErr_Format(PyExc_TypeError,
1244 "string indices must be integers, not %.200s",
1245 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246 return NULL;
1247 }
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
1251string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252{
1253 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001254 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001255 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256 return -1;
1257 }
1258 *ptr = (void *)self->ob_sval;
1259 return self->ob_size;
1260}
1261
Martin v. Löwis18e16552006-02-15 17:27:45 +00001262static Py_ssize_t
1263string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001264{
Guido van Rossum045e6881997-09-08 18:30:11 +00001265 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001266 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267 return -1;
1268}
1269
Martin v. Löwis18e16552006-02-15 17:27:45 +00001270static Py_ssize_t
1271string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272{
1273 if ( lenp )
1274 *lenp = self->ob_size;
1275 return 1;
1276}
1277
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278static Py_ssize_t
1279string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001280{
1281 if ( index != 0 ) {
1282 PyErr_SetString(PyExc_SystemError,
1283 "accessing non-existent string segment");
1284 return -1;
1285 }
1286 *ptr = self->ob_sval;
1287 return self->ob_size;
1288}
1289
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001290static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001292 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (ssizeargfunc)string_repeat, /*sq_repeat*/
1294 (ssizeargfunc)string_item, /*sq_item*/
1295 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001296 0, /*sq_ass_item*/
1297 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001298 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001299};
1300
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001301static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001302 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303 (binaryfunc)string_subscript,
1304 0,
1305};
1306
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001307static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001308 (readbufferproc)string_buffer_getreadbuf,
1309 (writebufferproc)string_buffer_getwritebuf,
1310 (segcountproc)string_buffer_getsegcount,
1311 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001312};
1313
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314
1315
1316#define LEFTSTRIP 0
1317#define RIGHTSTRIP 1
1318#define BOTHSTRIP 2
1319
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001320/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001321static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1322
1323#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001324
Andrew Dalke525eab32006-05-26 14:00:45 +00001325
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001326/* Don't call if length < 2 */
1327#define Py_STRING_MATCH(target, offset, pattern, length) \
1328 (target[offset] == pattern[0] && \
1329 target[offset+length-1] == pattern[length-1] && \
1330 !memcmp(target+offset+1, pattern+1, length-2) )
1331
1332
Andrew Dalke525eab32006-05-26 14:00:45 +00001333/* Overallocate the initial list to reduce the number of reallocs for small
1334 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1335 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1336 text (roughly 11 words per line) and field delimited data (usually 1-10
1337 fields). For large strings the split algorithms are bandwidth limited
1338 so increasing the preallocation likely will not improve things.*/
1339
1340#define MAX_PREALLOC 12
1341
1342/* 5 splits gives 6 elements */
1343#define PREALLOC_SIZE(maxsplit) \
1344 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1345
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001346#define SPLIT_APPEND(data, left, right) \
1347 str = PyString_FromStringAndSize((data) + (left), \
1348 (right) - (left)); \
1349 if (str == NULL) \
1350 goto onError; \
1351 if (PyList_Append(list, str)) { \
1352 Py_DECREF(str); \
1353 goto onError; \
1354 } \
1355 else \
1356 Py_DECREF(str);
1357
Andrew Dalke02758d62006-05-26 15:21:01 +00001358#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001359 str = PyString_FromStringAndSize((data) + (left), \
1360 (right) - (left)); \
1361 if (str == NULL) \
1362 goto onError; \
1363 if (count < MAX_PREALLOC) { \
1364 PyList_SET_ITEM(list, count, str); \
1365 } else { \
1366 if (PyList_Append(list, str)) { \
1367 Py_DECREF(str); \
1368 goto onError; \
1369 } \
1370 else \
1371 Py_DECREF(str); \
1372 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001373 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001374
1375/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001376#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001377
Andrew Dalke02758d62006-05-26 15:21:01 +00001378#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1379#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1380#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1381#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1382
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001383Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001384split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385{
Andrew Dalke525eab32006-05-26 14:00:45 +00001386 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001387 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001388 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389
1390 if (list == NULL)
1391 return NULL;
1392
Andrew Dalke02758d62006-05-26 15:21:01 +00001393 i = j = 0;
1394
1395 while (maxsplit-- > 0) {
1396 SKIP_SPACE(s, i, len);
1397 if (i==len) break;
1398 j = i; i++;
1399 SKIP_NONSPACE(s, i, len);
1400 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001402
1403 if (i < len) {
1404 /* Only occurs when maxsplit was reached */
1405 /* Skip any remaining whitespace and copy to end of string */
1406 SKIP_SPACE(s, i, len);
1407 if (i != len)
1408 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001412 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 Py_DECREF(list);
1414 return NULL;
1415}
1416
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001417Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001418split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419{
Andrew Dalke525eab32006-05-26 14:00:45 +00001420 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001422 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423
1424 if (list == NULL)
1425 return NULL;
1426
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001427 i = j = 0;
1428 while ((j < len) && (maxcount-- > 0)) {
1429 for(; j<len; j++) {
1430 /* I found that using memchr makes no difference */
1431 if (s[j] == ch) {
1432 SPLIT_ADD(s, i, j);
1433 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001435 }
1436 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001437 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001438 if (i <= len) {
1439 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001441 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 return list;
1443
1444 onError:
1445 Py_DECREF(list);
1446 return NULL;
1447}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001449PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450"S.split([sep [,maxsplit]]) -> list of strings\n\
1451\n\
1452Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001453delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001454splits are done. If sep is not specified or is None, any\n\
1455whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456
1457static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001458string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001460 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001461 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001464#ifdef USE_FAST
1465 Py_ssize_t pos;
1466#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467
Martin v. Löwis9c830762006-04-13 08:37:17 +00001468 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001470 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001471 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (PyString_Check(subobj)) {
1475 sub = PyString_AS_STRING(subobj);
1476 n = PyString_GET_SIZE(subobj);
1477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001478#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 else if (PyUnicode_Check(subobj))
1480 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001481#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001482 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1483 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 if (n == 0) {
1486 PyErr_SetString(PyExc_ValueError, "empty separator");
1487 return NULL;
1488 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 else if (n == 1)
1490 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491
Andrew Dalke525eab32006-05-26 14:00:45 +00001492 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 if (list == NULL)
1494 return NULL;
1495
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001496#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001498 while (maxsplit-- > 0) {
1499 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1500 if (pos < 0)
1501 break;
1502 j = i+pos;
1503 SPLIT_ADD(s, i, j);
1504 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001506#else
1507 i = j = 0;
1508 while ((j+n <= len) && (maxsplit-- > 0)) {
1509 for (; j+n <= len; j++) {
1510 if (Py_STRING_MATCH(s, j, sub, n)) {
1511 SPLIT_ADD(s, i, j);
1512 i = j = j + n;
1513 break;
1514 }
1515 }
1516 }
1517#endif
1518 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001519 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 return list;
1521
Andrew Dalke525eab32006-05-26 14:00:45 +00001522 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523 Py_DECREF(list);
1524 return NULL;
1525}
1526
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001527PyDoc_STRVAR(partition__doc__,
1528"S.partition(sep) -> (head, sep, tail)\n\
1529\n\
1530Searches for the separator sep in S, and returns the part before it,\n\
1531the separator itself, and the part after it. If the separator is not\n\
1532found, returns S and two empty strings.");
1533
1534static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001535string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001536{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001537 const char *sep;
1538 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001539
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001540 if (PyString_Check(sep_obj)) {
1541 sep = PyString_AS_STRING(sep_obj);
1542 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001544#ifdef Py_USING_UNICODE
1545 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001546 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001547#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001548 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549 return NULL;
1550
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001551 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001552 (PyObject*) self,
1553 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1554 sep_obj, sep, sep_len
1555 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001556}
1557
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001558PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001559"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001560\n\
1561Searches for the separator sep in S, starting at the end of S, and returns\n\
1562the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001563separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001564
1565static PyObject *
1566string_rpartition(PyStringObject *self, PyObject *sep_obj)
1567{
1568 const char *sep;
1569 Py_ssize_t sep_len;
1570
1571 if (PyString_Check(sep_obj)) {
1572 sep = PyString_AS_STRING(sep_obj);
1573 sep_len = PyString_GET_SIZE(sep_obj);
1574 }
1575#ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(sep_obj))
1577 return PyUnicode_Partition((PyObject *) self, sep_obj);
1578#endif
1579 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1580 return NULL;
1581
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001582 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001583 (PyObject*) self,
1584 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1585 sep_obj, sep, sep_len
1586 );
1587}
1588
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001589Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001590rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001591{
Andrew Dalke525eab32006-05-26 14:00:45 +00001592 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001593 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595
1596 if (list == NULL)
1597 return NULL;
1598
Andrew Dalke02758d62006-05-26 15:21:01 +00001599 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001600
Andrew Dalke02758d62006-05-26 15:21:01 +00001601 while (maxsplit-- > 0) {
1602 RSKIP_SPACE(s, i);
1603 if (i<0) break;
1604 j = i; i--;
1605 RSKIP_NONSPACE(s, i);
1606 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001607 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001608 if (i >= 0) {
1609 /* Only occurs when maxsplit was reached */
1610 /* Skip any remaining whitespace and copy to beginning of string */
1611 RSKIP_SPACE(s, i);
1612 if (i >= 0)
1613 SPLIT_ADD(s, 0, i + 1);
1614
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001615 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001616 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001617 if (PyList_Reverse(list) < 0)
1618 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001619 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001620 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 Py_DECREF(list);
1622 return NULL;
1623}
1624
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001625Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001626rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627{
Andrew Dalke525eab32006-05-26 14:00:45 +00001628 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631
1632 if (list == NULL)
1633 return NULL;
1634
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001635 i = j = len - 1;
1636 while ((i >= 0) && (maxcount-- > 0)) {
1637 for (; i >= 0; i--) {
1638 if (s[i] == ch) {
1639 SPLIT_ADD(s, i + 1, j + 1);
1640 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001642 }
1643 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001644 }
1645 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001646 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001647 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001649 if (PyList_Reverse(list) < 0)
1650 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 return list;
1652
1653 onError:
1654 Py_DECREF(list);
1655 return NULL;
1656}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001657
1658PyDoc_STRVAR(rsplit__doc__,
1659"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1660\n\
1661Return a list of the words in the string S, using sep as the\n\
1662delimiter string, starting at the end of the string and working\n\
1663to the front. If maxsplit is given, at most maxsplit splits are\n\
1664done. If sep is not specified or is None, any whitespace string\n\
1665is a separator.");
1666
1667static PyObject *
1668string_rsplit(PyStringObject *self, PyObject *args)
1669{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001670 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001671 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001672 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674
Martin v. Löwis9c830762006-04-13 08:37:17 +00001675 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676 return NULL;
1677 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001678 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 if (subobj == Py_None)
1680 return rsplit_whitespace(s, len, maxsplit);
1681 if (PyString_Check(subobj)) {
1682 sub = PyString_AS_STRING(subobj);
1683 n = PyString_GET_SIZE(subobj);
1684 }
1685#ifdef Py_USING_UNICODE
1686 else if (PyUnicode_Check(subobj))
1687 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1688#endif
1689 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1690 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001691
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001692 if (n == 0) {
1693 PyErr_SetString(PyExc_ValueError, "empty separator");
1694 return NULL;
1695 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001696 else if (n == 1)
1697 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001698
Andrew Dalke525eab32006-05-26 14:00:45 +00001699 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 if (list == NULL)
1701 return NULL;
1702
1703 j = len;
1704 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001705
1706 while ( (i >= 0) && (maxsplit-- > 0) ) {
1707 for (; i>=0; i--) {
1708 if (Py_STRING_MATCH(s, i, sub, n)) {
1709 SPLIT_ADD(s, i + n, j);
1710 j = i;
1711 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001713 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001715 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001716 SPLIT_ADD(s, 0, j);
1717 FIX_PREALLOC_SIZE(list);
1718 if (PyList_Reverse(list) < 0)
1719 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001720 return list;
1721
Andrew Dalke525eab32006-05-26 14:00:45 +00001722onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001723 Py_DECREF(list);
1724 return NULL;
1725}
1726
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001728PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729"S.join(sequence) -> string\n\
1730\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
1734static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001735string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736{
1737 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001738 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001744 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745
Tim Peters19fe14e2001-01-19 03:03:47 +00001746 seq = PySequence_Fast(orig, "");
1747 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001748 return NULL;
1749 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001750
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001751 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 if (seqlen == 0) {
1753 Py_DECREF(seq);
1754 return PyString_FromString("");
1755 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001757 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001758 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1759 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001761 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764
Raymond Hettinger674f2412004-08-23 23:23:54 +00001765 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001766 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 * Do a pre-pass to figure out the total amount of space we'll
1768 * need (sz), see whether any argument is absurd, and defer to
1769 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001770 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001771 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 item = PySequence_Fast_GET_ITEM(seq, i);
1774 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001775#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001777 /* Defer to Unicode join.
1778 * CAUTION: There's no gurantee that the
1779 * original sequence can be iterated over
1780 * again, so we must pass seq here.
1781 */
1782 PyObject *result;
1783 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001784 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001785 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001786 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001787#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001789 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001790 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001791 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 Py_DECREF(seq);
1793 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001794 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 sz += PyString_GET_SIZE(item);
1796 if (i != 0)
1797 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001798 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001800 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 Py_DECREF(seq);
1802 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001804 }
1805
1806 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001807 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 if (res == NULL) {
1809 Py_DECREF(seq);
1810 return NULL;
1811 }
1812
1813 /* Catenate everything. */
1814 p = PyString_AS_STRING(res);
1815 for (i = 0; i < seqlen; ++i) {
1816 size_t n;
1817 item = PySequence_Fast_GET_ITEM(seq, i);
1818 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001819 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001820 p += n;
1821 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001822 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001823 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001826
Jeremy Hylton49048292000-07-11 03:28:17 +00001827 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829}
1830
Tim Peters52e155e2001-06-16 05:42:57 +00001831PyObject *
1832_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001833{
Tim Petersa7259592001-06-16 05:11:17 +00001834 assert(sep != NULL && PyString_Check(sep));
1835 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001836 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001837}
1838
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001839Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001841{
1842 if (*end > len)
1843 *end = len;
1844 else if (*end < 0)
1845 *end += len;
1846 if (*end < 0)
1847 *end = 0;
1848 if (*start < 0)
1849 *start += len;
1850 if (*start < 0)
1851 *start = 0;
1852}
1853
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001854Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001855string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001858 const char *sub;
1859 Py_ssize_t sub_len;
1860 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001862 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1863 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 return -2;
1865 if (PyString_Check(subobj)) {
1866 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001867 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001869#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001871 return PyUnicode_Find(
1872 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001873#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001874 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001875 /* XXX - the "expected a character buffer object" is pretty
1876 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877 return -2;
1878
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001879 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001880 return stringlib_find_slice(
1881 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1882 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001883 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001884 return stringlib_rfind_slice(
1885 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1886 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887}
1888
1889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001890PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891"S.find(sub [,start [,end]]) -> int\n\
1892\n\
1893Return the lowest index in S where substring sub is found,\n\
1894such that sub is contained within s[start,end]. Optional\n\
1895arguments start and end are interpreted as in slice notation.\n\
1896\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898
1899static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001900string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001902 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 if (result == -2)
1904 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001909PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910"S.index(sub [,start [,end]]) -> int\n\
1911\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001912Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
1914static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001915string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 if (result == -2)
1919 return NULL;
1920 if (result == -1) {
1921 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001922 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 return NULL;
1924 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001925 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926}
1927
1928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001929PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930"S.rfind(sub [,start [,end]]) -> int\n\
1931\n\
1932Return the highest index in S where substring sub is found,\n\
1933such that sub is contained within s[start,end]. Optional\n\
1934arguments start and end are interpreted as in slice notation.\n\
1935\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937
1938static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001939string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 if (result == -2)
1943 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001944 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.rindex(sub [,start [,end]]) -> int\n\
1950\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
1953static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001954string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001956 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 if (result == -2)
1958 return NULL;
1959 if (result == -1) {
1960 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001961 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 return NULL;
1963 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001964 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965}
1966
1967
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001968Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1970{
1971 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001972 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1975 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976
1977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
1979 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1980 i++;
1981 }
1982 }
1983
1984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
1988 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1989 j++;
1990 }
1991
1992 if (i == 0 && j == len && PyString_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyString_FromStringAndSize(s+i, j-i);
1998}
1999
2000
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002001Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002002do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
2004 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 i = 0;
2008 if (striptype != RIGHTSTRIP) {
2009 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2010 i++;
2011 }
2012 }
2013
2014 j = len;
2015 if (striptype != LEFTSTRIP) {
2016 do {
2017 j--;
2018 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2019 j++;
2020 }
2021
Tim Peters8fa5dd02001-09-12 02:18:30 +00002022 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 Py_INCREF(self);
2024 return (PyObject*)self;
2025 }
2026 else
2027 return PyString_FromStringAndSize(s+i, j-i);
2028}
2029
2030
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002031Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002032do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2033{
2034 PyObject *sep = NULL;
2035
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002036 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002037 return NULL;
2038
2039 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002040 if (PyString_Check(sep))
2041 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002042#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002043 else if (PyUnicode_Check(sep)) {
2044 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2045 PyObject *res;
2046 if (uniself==NULL)
2047 return NULL;
2048 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2049 striptype, sep);
2050 Py_DECREF(uniself);
2051 return res;
2052 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002053#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002054 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 STRIPNAME(striptype));
2061 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062 }
2063
2064 return do_strip(self, striptype);
2065}
2066
2067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002069"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070\n\
2071Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002072whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002073If chars is given and not None, remove characters in chars instead.\n\
2074If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075
2076static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002077string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079 if (PyTuple_GET_SIZE(args) == 0)
2080 return do_strip(self, BOTHSTRIP); /* Common case */
2081 else
2082 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083}
2084
2085
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002086PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002087"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002089Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002090If chars is given and not None, remove characters in chars instead.\n\
2091If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
2093static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002094string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096 if (PyTuple_GET_SIZE(args) == 0)
2097 return do_strip(self, LEFTSTRIP); /* Common case */
2098 else
2099 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100}
2101
2102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002104"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002106Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002107If chars is given and not None, remove characters in chars instead.\n\
2108If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109
2110static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002111string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113 if (PyTuple_GET_SIZE(args) == 0)
2114 return do_strip(self, RIGHTSTRIP); /* Common case */
2115 else
2116 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117}
2118
2119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002120PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121"S.lower() -> string\n\
2122\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002123Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002125/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2126#ifndef _tolower
2127#define _tolower tolower
2128#endif
2129
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002131string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002133 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002134 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002135 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002137 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140
2141 s = PyString_AS_STRING(newobj);
2142
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002143 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002144
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002146 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002147 if (isupper(c))
2148 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002150
Anthony Baxtera6286212006-04-11 07:42:36 +00002151 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152}
2153
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002154PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155"S.upper() -> string\n\
2156\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002159#ifndef _toupper
2160#define _toupper toupper
2161#endif
2162
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002164string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002166 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002167 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002168 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002170 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173
2174 s = PyString_AS_STRING(newobj);
2175
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002176 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180 if (islower(c))
2181 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002183
Anthony Baxtera6286212006-04-11 07:42:36 +00002184 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185}
2186
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188"S.title() -> string\n\
2189\n\
2190Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002191characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192
2193static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002194string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195{
2196 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002197 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002199 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 newobj = PyString_FromStringAndSize(NULL, n);
2202 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002204 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 for (i = 0; i < n; i++) {
2206 int c = Py_CHARMASK(*s++);
2207 if (islower(c)) {
2208 if (!previous_is_cased)
2209 c = toupper(c);
2210 previous_is_cased = 1;
2211 } else if (isupper(c)) {
2212 if (previous_is_cased)
2213 c = tolower(c);
2214 previous_is_cased = 1;
2215 } else
2216 previous_is_cased = 0;
2217 *s_new++ = c;
2218 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002219 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220}
2221
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223"S.capitalize() -> string\n\
2224\n\
2225Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002226capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
2228static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002229string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230{
2231 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002232 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002233 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Anthony Baxtera6286212006-04-11 07:42:36 +00002235 newobj = PyString_FromStringAndSize(NULL, n);
2236 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002238 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 if (0 < n) {
2240 int c = Py_CHARMASK(*s++);
2241 if (islower(c))
2242 *s_new = toupper(c);
2243 else
2244 *s_new = c;
2245 s_new++;
2246 }
2247 for (i = 1; i < n; i++) {
2248 int c = Py_CHARMASK(*s++);
2249 if (isupper(c))
2250 *s_new = tolower(c);
2251 else
2252 *s_new = c;
2253 s_new++;
2254 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002255 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256}
2257
2258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260"S.count(sub[, start[, end]]) -> int\n\
2261\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002262Return the number of non-overlapping occurrences of substring sub in\n\
2263string S[start:end]. Optional arguments start and end are interpreted\n\
2264as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
2266static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002267string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002269 PyObject *sub_obj;
2270 const char *str = PyString_AS_STRING(self), *sub;
2271 Py_ssize_t sub_len;
2272 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002274 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2275 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002277
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002278 if (PyString_Check(sub_obj)) {
2279 sub = PyString_AS_STRING(sub_obj);
2280 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002282#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002283 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002284 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002286 if (count == -1)
2287 return NULL;
2288 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002289 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002290 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002291#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002292 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 return NULL;
2294
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002295 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002296
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002297 return PyInt_FromSsize_t(
2298 stringlib_count(str + start, end - start, sub, sub_len)
2299 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300}
2301
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002302PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303"S.swapcase() -> string\n\
2304\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002306converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307
2308static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002309string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310{
2311 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002312 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002313 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314
Anthony Baxtera6286212006-04-11 07:42:36 +00002315 newobj = PyString_FromStringAndSize(NULL, n);
2316 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002318 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 for (i = 0; i < n; i++) {
2320 int c = Py_CHARMASK(*s++);
2321 if (islower(c)) {
2322 *s_new = toupper(c);
2323 }
2324 else if (isupper(c)) {
2325 *s_new = tolower(c);
2326 }
2327 else
2328 *s_new = c;
2329 s_new++;
2330 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002331 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332}
2333
2334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002335PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336"S.translate(table [,deletechars]) -> string\n\
2337\n\
2338Return a copy of the string S, where all characters occurring\n\
2339in the optional argument deletechars are removed, and the\n\
2340remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002341translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
2343static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002344string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 register char *input, *output;
2347 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002348 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002351 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 PyObject *result;
2353 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002356 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359
2360 if (PyString_Check(tableobj)) {
2361 table1 = PyString_AS_STRING(tableobj);
2362 tablen = PyString_GET_SIZE(tableobj);
2363 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002364#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002366 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 parameter; instead a mapping to None will cause characters
2368 to be deleted. */
2369 if (delobj != NULL) {
2370 PyErr_SetString(PyExc_TypeError,
2371 "deletions are implemented differently for unicode");
2372 return NULL;
2373 }
2374 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2375 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002376#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379
Martin v. Löwis00b61272002-12-12 20:03:19 +00002380 if (tablen != 256) {
2381 PyErr_SetString(PyExc_ValueError,
2382 "translation table must be 256 characters long");
2383 return NULL;
2384 }
2385
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 if (delobj != NULL) {
2387 if (PyString_Check(delobj)) {
2388 del_table = PyString_AS_STRING(delobj);
2389 dellen = PyString_GET_SIZE(delobj);
2390 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002391#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 else if (PyUnicode_Check(delobj)) {
2393 PyErr_SetString(PyExc_TypeError,
2394 "deletions are implemented differently for unicode");
2395 return NULL;
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2399 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 }
2401 else {
2402 del_table = NULL;
2403 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 }
2405
2406 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002407 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 result = PyString_FromStringAndSize((char *)NULL, inlen);
2409 if (result == NULL)
2410 return NULL;
2411 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002412 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413
2414 if (dellen == 0) {
2415 /* If no deletions are required, use faster code */
2416 for (i = inlen; --i >= 0; ) {
2417 c = Py_CHARMASK(*input++);
2418 if (Py_CHARMASK((*output++ = table[c])) != c)
2419 changed = 1;
2420 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002421 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 return result;
2423 Py_DECREF(result);
2424 Py_INCREF(input_obj);
2425 return input_obj;
2426 }
2427
2428 for (i = 0; i < 256; i++)
2429 trans_table[i] = Py_CHARMASK(table[i]);
2430
2431 for (i = 0; i < dellen; i++)
2432 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2433
2434 for (i = inlen; --i >= 0; ) {
2435 c = Py_CHARMASK(*input++);
2436 if (trans_table[c] != -1)
2437 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2438 continue;
2439 changed = 1;
2440 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002441 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442 Py_DECREF(result);
2443 Py_INCREF(input_obj);
2444 return input_obj;
2445 }
2446 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002447 if (inlen > 0)
2448 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 return result;
2450}
2451
2452
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002453#define FORWARD 1
2454#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002456/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002458#define findchar(target, target_len, c) \
2459 ((char *)memchr((const void *)(target), c, target_len))
2460
2461/* String ops must return a string. */
2462/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002463Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002464return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002466 if (PyString_CheckExact(self)) {
2467 Py_INCREF(self);
2468 return self;
2469 }
2470 return (PyStringObject *)PyString_FromStringAndSize(
2471 PyString_AS_STRING(self),
2472 PyString_GET_SIZE(self));
2473}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002475Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002476countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002477{
2478 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002479 const char *start=target;
2480 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002481
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002482 while ( (start=findchar(start, end-start, c)) != NULL ) {
2483 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002484 if (count >= maxcount)
2485 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002486 start += 1;
2487 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002488 return count;
2489}
2490
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002491Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002492findstring(const char *target, Py_ssize_t target_len,
2493 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002494 Py_ssize_t start,
2495 Py_ssize_t end,
2496 int direction)
2497{
2498 if (start < 0) {
2499 start += target_len;
2500 if (start < 0)
2501 start = 0;
2502 }
2503 if (end > target_len) {
2504 end = target_len;
2505 } else if (end < 0) {
2506 end += target_len;
2507 if (end < 0)
2508 end = 0;
2509 }
2510
2511 /* zero-length substrings always match at the first attempt */
2512 if (pattern_len == 0)
2513 return (direction > 0) ? start : end;
2514
2515 end -= pattern_len;
2516
2517 if (direction < 0) {
2518 for (; end >= start; end--)
2519 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2520 return end;
2521 } else {
2522 for (; start <= end; start++)
2523 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2524 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525 }
2526 return -1;
2527}
2528
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002529Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002530countstring(const char *target, Py_ssize_t target_len,
2531 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002532 Py_ssize_t start,
2533 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002534 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002536 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 if (start < 0) {
2539 start += target_len;
2540 if (start < 0)
2541 start = 0;
2542 }
2543 if (end > target_len) {
2544 end = target_len;
2545 } else if (end < 0) {
2546 end += target_len;
2547 if (end < 0)
2548 end = 0;
2549 }
2550
2551 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002552 if (pattern_len == 0 || maxcount == 0) {
2553 if (target_len+1 < maxcount)
2554 return target_len+1;
2555 return maxcount;
2556 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002557
2558 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002560 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002561 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2562 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002563 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002564 end -= pattern_len-1;
2565 }
2566 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002567 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2569 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002570 if (--maxcount <= 0)
2571 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002572 start += pattern_len-1;
2573 }
2574 }
2575 return count;
2576}
2577
2578
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002579/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002580
2581/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002582Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002583replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002584 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585 Py_ssize_t maxcount)
2586{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002587 char *self_s, *result_s;
2588 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589 Py_ssize_t count, i, product;
2590 PyStringObject *result;
2591
2592 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002593
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002594 /* 1 at the end plus 1 after every character */
2595 count = self_len+1;
2596 if (maxcount < count)
2597 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002598
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002599 /* Check for overflow */
2600 /* result_len = count * to_len + self_len; */
2601 product = count * to_len;
2602 if (product / to_len != count) {
2603 PyErr_SetString(PyExc_OverflowError,
2604 "replace string is too long");
2605 return NULL;
2606 }
2607 result_len = product + self_len;
2608 if (result_len < 0) {
2609 PyErr_SetString(PyExc_OverflowError,
2610 "replace string is too long");
2611 return NULL;
2612 }
2613
2614 if (! (result = (PyStringObject *)
2615 PyString_FromStringAndSize(NULL, result_len)) )
2616 return NULL;
2617
2618 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002619 result_s = PyString_AS_STRING(result);
2620
2621 /* TODO: special case single character, which doesn't need memcpy */
2622
2623 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002624 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002625 result_s += to_len;
2626 count -= 1;
2627
2628 for (i=0; i<count; i++) {
2629 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002630 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002631 result_s += to_len;
2632 }
2633
2634 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002635 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002636
2637 return result;
2638}
2639
2640/* Special case for deleting a single character */
2641/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002642Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002643replace_delete_single_character(PyStringObject *self,
2644 char from_c, Py_ssize_t maxcount)
2645{
2646 char *self_s, *result_s;
2647 char *start, *next, *end;
2648 Py_ssize_t self_len, result_len;
2649 Py_ssize_t count;
2650 PyStringObject *result;
2651
2652 self_len = PyString_GET_SIZE(self);
2653 self_s = PyString_AS_STRING(self);
2654
Andrew Dalke51324072006-05-26 20:25:22 +00002655 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002656 if (count == 0) {
2657 return return_self(self);
2658 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002659
2660 result_len = self_len - count; /* from_len == 1 */
2661 assert(result_len>=0);
2662
2663 if ( (result = (PyStringObject *)
2664 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2665 return NULL;
2666 result_s = PyString_AS_STRING(result);
2667
2668 start = self_s;
2669 end = self_s + self_len;
2670 while (count-- > 0) {
2671 next = findchar(start, end-start, from_c);
2672 if (next == NULL)
2673 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002674 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 result_s += (next-start);
2676 start = next+1;
2677 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002678 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002679
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680 return result;
2681}
2682
2683/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2684
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002685Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002686replace_delete_substring(PyStringObject *self,
2687 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002688 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002689 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002690 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002691 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002692 Py_ssize_t count, offset;
2693 PyStringObject *result;
2694
2695 self_len = PyString_GET_SIZE(self);
2696 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697
2698 count = countstring(self_s, self_len,
2699 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002700 0, self_len, 1,
2701 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702
2703 if (count == 0) {
2704 /* no matches */
2705 return return_self(self);
2706 }
2707
2708 result_len = self_len - (count * from_len);
2709 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002710
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002711 if ( (result = (PyStringObject *)
2712 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2713 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002714
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002715 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002716
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 start = self_s;
2718 end = self_s + self_len;
2719 while (count-- > 0) {
2720 offset = findstring(start, end-start,
2721 from_s, from_len,
2722 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002723 if (offset == -1)
2724 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002725 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002726
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002727 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002728
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002729 result_s += (next-start);
2730 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002731 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002732 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002734}
2735
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002737Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738replace_single_character_in_place(PyStringObject *self,
2739 char from_c, char to_c,
2740 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002741{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002742 char *self_s, *result_s, *start, *end, *next;
2743 Py_ssize_t self_len;
2744 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002745
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746 /* The result string will be the same size */
2747 self_s = PyString_AS_STRING(self);
2748 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002749
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002750 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 if (next == NULL) {
2753 /* No matches; return the original string */
2754 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002755 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002756
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002758 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759 if (result == NULL)
2760 return NULL;
2761 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002762 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002763
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002764 /* change everything in-place, starting with this one */
2765 start = result_s + (next-self_s);
2766 *start = to_c;
2767 start++;
2768 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002769
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002770 while (--maxcount > 0) {
2771 next = findchar(start, end-start, from_c);
2772 if (next == NULL)
2773 break;
2774 *next = to_c;
2775 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002776 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002777
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002779}
2780
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002781/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002782Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002784 const char *from_s, Py_ssize_t from_len,
2785 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002786 Py_ssize_t maxcount)
2787{
2788 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002789 char *self_s;
2790 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002794
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 self_s = PyString_AS_STRING(self);
2796 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002797
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002798 offset = findstring(self_s, self_len,
2799 from_s, from_len,
2800 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002801 if (offset == -1) {
2802 /* No matches; return the original string */
2803 return return_self(self);
2804 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002805
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002807 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 if (result == NULL)
2809 return NULL;
2810 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002811 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002812
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813 /* change everything in-place, starting with this one */
2814 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002815 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 start += from_len;
2817 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002818
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 while ( --maxcount > 0) {
2820 offset = findstring(start, end-start,
2821 from_s, from_len,
2822 0, end-start, FORWARD);
2823 if (offset==-1)
2824 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002825 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002826 start += offset+from_len;
2827 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002828
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 return result;
2830}
2831
2832/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002833Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002834replace_single_character(PyStringObject *self,
2835 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002836 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002837 Py_ssize_t maxcount)
2838{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002839 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002840 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002841 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 Py_ssize_t count, product;
2843 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002844
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002845 self_s = PyString_AS_STRING(self);
2846 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002847
Andrew Dalke51324072006-05-26 20:25:22 +00002848 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002849 if (count == 0) {
2850 /* no matches, return unchanged */
2851 return return_self(self);
2852 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002853
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002854 /* use the difference between current and new, hence the "-1" */
2855 /* result_len = self_len + count * (to_len-1) */
2856 product = count * (to_len-1);
2857 if (product / (to_len-1) != count) {
2858 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2859 return NULL;
2860 }
2861 result_len = self_len + product;
2862 if (result_len < 0) {
2863 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2864 return NULL;
2865 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002866
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 if ( (result = (PyStringObject *)
2868 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2869 return NULL;
2870 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002871
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 start = self_s;
2873 end = self_s + self_len;
2874 while (count-- > 0) {
2875 next = findchar(start, end-start, from_c);
2876 if (next == NULL)
2877 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002878
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 if (next == start) {
2880 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002881 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002882 result_s += to_len;
2883 start += 1;
2884 } else {
2885 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002886 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002887 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002888 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002889 result_s += to_len;
2890 start = next+1;
2891 }
2892 }
2893 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002894 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002895
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002896 return result;
2897}
2898
2899/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002900Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002902 const char *from_s, Py_ssize_t from_len,
2903 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002904 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002905 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002907 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 Py_ssize_t count, offset, product;
2909 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002910
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911 self_s = PyString_AS_STRING(self);
2912 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002913
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 count = countstring(self_s, self_len,
2915 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002916 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 if (count == 0) {
2918 /* no matches, return unchanged */
2919 return return_self(self);
2920 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002921
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922 /* Check for overflow */
2923 /* result_len = self_len + count * (to_len-from_len) */
2924 product = count * (to_len-from_len);
2925 if (product / (to_len-from_len) != count) {
2926 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2927 return NULL;
2928 }
2929 result_len = self_len + product;
2930 if (result_len < 0) {
2931 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2932 return NULL;
2933 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002934
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002935 if ( (result = (PyStringObject *)
2936 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2937 return NULL;
2938 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002939
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002940 start = self_s;
2941 end = self_s + self_len;
2942 while (count-- > 0) {
2943 offset = findstring(start, end-start,
2944 from_s, from_len,
2945 0, end-start, FORWARD);
2946 if (offset == -1)
2947 break;
2948 next = start+offset;
2949 if (next == start) {
2950 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002951 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002952 result_s += to_len;
2953 start += from_len;
2954 } else {
2955 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002956 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002957 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += to_len;
2960 start = next+from_len;
2961 }
2962 }
2963 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002964 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002965
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002966 return result;
2967}
2968
2969
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002970Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002971replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002972 const char *from_s, Py_ssize_t from_len,
2973 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002974 Py_ssize_t maxcount)
2975{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 if (maxcount < 0) {
2977 maxcount = PY_SSIZE_T_MAX;
2978 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2979 /* nothing to do; return the original string */
2980 return return_self(self);
2981 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002982
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 if (maxcount == 0 ||
2984 (from_len == 0 && to_len == 0)) {
2985 /* nothing to do; return the original string */
2986 return return_self(self);
2987 }
2988
2989 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002990
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002991 if (from_len == 0) {
2992 /* insert the 'to' string everywhere. */
2993 /* >>> "Python".replace("", ".") */
2994 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002995 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002996 }
2997
2998 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2999 /* point for an empty self string to generate a non-empty string */
3000 /* Special case so the remaining code always gets a non-empty string */
3001 if (PyString_GET_SIZE(self) == 0) {
3002 return return_self(self);
3003 }
3004
3005 if (to_len == 0) {
3006 /* delete all occurances of 'from' string */
3007 if (from_len == 1) {
3008 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003009 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003010 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003011 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 }
3013 }
3014
3015 /* Handle special case where both strings have the same length */
3016
3017 if (from_len == to_len) {
3018 if (from_len == 1) {
3019 return replace_single_character_in_place(
3020 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003021 from_s[0],
3022 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003023 maxcount);
3024 } else {
3025 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003026 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003027 }
3028 }
3029
3030 /* Otherwise use the more generic algorithms */
3031 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003032 return replace_single_character(self, from_s[0],
3033 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003034 } else {
3035 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003036 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003037 }
3038}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003040PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003041"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003042\n\
3043Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003044old replaced by new. If the optional argument count is\n\
3045given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046
3047static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003048string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003050 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003051 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003052 const char *from_s, *to_s;
3053 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003058 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003059 from_s = PyString_AS_STRING(from);
3060 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003062#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003063 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003064 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003066#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003067 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003068 return NULL;
3069
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003071 to_s = PyString_AS_STRING(to);
3072 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003073 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003074#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003075 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003076 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003078#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003079 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 return NULL;
3081
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003083 from_s, from_len,
3084 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085}
3086
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003087/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003088
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003089/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003090 * against substr, using the start and end arguments. Returns
3091 * -1 on error, 0 if not found and 1 if found.
3092 */
3093Py_LOCAL(int)
3094_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3095 Py_ssize_t end, int direction)
3096{
3097 Py_ssize_t len = PyString_GET_SIZE(self);
3098 Py_ssize_t slen;
3099 const char* sub;
3100 const char* str;
3101
3102 if (PyString_Check(substr)) {
3103 sub = PyString_AS_STRING(substr);
3104 slen = PyString_GET_SIZE(substr);
3105 }
3106#ifdef Py_USING_UNICODE
3107 else if (PyUnicode_Check(substr))
3108 return PyUnicode_Tailmatch((PyObject *)self,
3109 substr, start, end, direction);
3110#endif
3111 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3112 return -1;
3113 str = PyString_AS_STRING(self);
3114
3115 string_adjust_indices(&start, &end, len);
3116
3117 if (direction < 0) {
3118 /* startswith */
3119 if (start+slen > len)
3120 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003121 } else {
3122 /* endswith */
3123 if (end-start < slen || start > len)
3124 return 0;
3125
3126 if (end-slen > start)
3127 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003128 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003129 if (end-start >= slen)
3130 return ! memcmp(str+start, sub, slen);
3131 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003132}
3133
3134
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003135PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003136"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003138Return True if S starts with the specified prefix, False otherwise.\n\
3139With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003140With optional end, stop comparing S at that position.\n\
3141prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003142
3143static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003144string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003145{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003146 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003147 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003149 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150
Guido van Rossumc6821402000-05-08 14:08:05 +00003151 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3152 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003153 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003154 if (PyTuple_Check(subobj)) {
3155 Py_ssize_t i;
3156 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3157 result = _string_tailmatch(self,
3158 PyTuple_GET_ITEM(subobj, i),
3159 start, end, -1);
3160 if (result == -1)
3161 return NULL;
3162 else if (result) {
3163 Py_RETURN_TRUE;
3164 }
3165 }
3166 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 }
Georg Brandl24250812006-06-09 18:45:48 +00003168 result = _string_tailmatch(self, subobj, start, end, -1);
3169 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003170 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003171 else
Georg Brandl24250812006-06-09 18:45:48 +00003172 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003173}
3174
3175
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003176PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003177"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003179Return True if S ends with the specified suffix, False otherwise.\n\
3180With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003181With optional end, stop comparing S at that position.\n\
3182suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183
3184static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003185string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003186{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003187 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003188 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003189 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003190 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191
Guido van Rossumc6821402000-05-08 14:08:05 +00003192 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3193 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003195 if (PyTuple_Check(subobj)) {
3196 Py_ssize_t i;
3197 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3198 result = _string_tailmatch(self,
3199 PyTuple_GET_ITEM(subobj, i),
3200 start, end, +1);
3201 if (result == -1)
3202 return NULL;
3203 else if (result) {
3204 Py_RETURN_TRUE;
3205 }
3206 }
3207 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 }
Georg Brandl24250812006-06-09 18:45:48 +00003209 result = _string_tailmatch(self, subobj, start, end, +1);
3210 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003211 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003212 else
Georg Brandl24250812006-06-09 18:45:48 +00003213 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003214}
3215
3216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003217PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003218"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003219\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003220Encodes S using the codec registered for encoding. encoding defaults\n\
3221to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003222handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003223a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3224'xmlcharrefreplace' as well as any other name registered with\n\
3225codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003226
3227static PyObject *
3228string_encode(PyStringObject *self, PyObject *args)
3229{
3230 char *encoding = NULL;
3231 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003232 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003233
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003234 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3235 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003236 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003237 if (v == NULL)
3238 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003239 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3240 PyErr_Format(PyExc_TypeError,
3241 "encoder did not return a string/unicode object "
3242 "(type=%.400s)",
3243 v->ob_type->tp_name);
3244 Py_DECREF(v);
3245 return NULL;
3246 }
3247 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003248
3249 onError:
3250 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003251}
3252
3253
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003254PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003255"S.decode([encoding[,errors]]) -> object\n\
3256\n\
3257Decodes S using the codec registered for encoding. encoding defaults\n\
3258to the default encoding. errors may be given to set a different error\n\
3259handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003260a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3261as well as any other name registerd with codecs.register_error that is\n\
3262able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003263
3264static PyObject *
3265string_decode(PyStringObject *self, PyObject *args)
3266{
3267 char *encoding = NULL;
3268 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003269 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003270
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003271 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3272 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003273 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003274 if (v == NULL)
3275 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003276 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3277 PyErr_Format(PyExc_TypeError,
3278 "decoder did not return a string/unicode object "
3279 "(type=%.400s)",
3280 v->ob_type->tp_name);
3281 Py_DECREF(v);
3282 return NULL;
3283 }
3284 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003285
3286 onError:
3287 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003288}
3289
3290
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003291PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003292"S.expandtabs([tabsize]) -> string\n\
3293\n\
3294Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003295If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003296
3297static PyObject*
3298string_expandtabs(PyStringObject *self, PyObject *args)
3299{
3300 const char *e, *p;
3301 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003302 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003303 PyObject *u;
3304 int tabsize = 8;
3305
3306 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3307 return NULL;
3308
Thomas Wouters7e474022000-07-16 12:04:32 +00003309 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310 i = j = 0;
3311 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3312 for (p = PyString_AS_STRING(self); p < e; p++)
3313 if (*p == '\t') {
3314 if (tabsize > 0)
3315 j += tabsize - (j % tabsize);
3316 }
3317 else {
3318 j++;
3319 if (*p == '\n' || *p == '\r') {
3320 i += j;
3321 j = 0;
3322 }
3323 }
3324
3325 /* Second pass: create output string and fill it */
3326 u = PyString_FromStringAndSize(NULL, i + j);
3327 if (!u)
3328 return NULL;
3329
3330 j = 0;
3331 q = PyString_AS_STRING(u);
3332
3333 for (p = PyString_AS_STRING(self); p < e; p++)
3334 if (*p == '\t') {
3335 if (tabsize > 0) {
3336 i = tabsize - (j % tabsize);
3337 j += i;
3338 while (i--)
3339 *q++ = ' ';
3340 }
3341 }
3342 else {
3343 j++;
3344 *q++ = *p;
3345 if (*p == '\n' || *p == '\r')
3346 j = 0;
3347 }
3348
3349 return u;
3350}
3351
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003352Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003353pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003354{
3355 PyObject *u;
3356
3357 if (left < 0)
3358 left = 0;
3359 if (right < 0)
3360 right = 0;
3361
Tim Peters8fa5dd02001-09-12 02:18:30 +00003362 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 Py_INCREF(self);
3364 return (PyObject *)self;
3365 }
3366
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003367 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003368 left + PyString_GET_SIZE(self) + right);
3369 if (u) {
3370 if (left)
3371 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003372 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003373 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003374 PyString_GET_SIZE(self));
3375 if (right)
3376 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3377 fill, right);
3378 }
3379
3380 return u;
3381}
3382
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003383PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003384"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003385"\n"
3386"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003387"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388
3389static PyObject *
3390string_ljust(PyStringObject *self, PyObject *args)
3391{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003392 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003393 char fillchar = ' ';
3394
Thomas Wouters4abb3662006-04-19 14:50:15 +00003395 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003396 return NULL;
3397
Tim Peters8fa5dd02001-09-12 02:18:30 +00003398 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 Py_INCREF(self);
3400 return (PyObject*) self;
3401 }
3402
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003403 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003404}
3405
3406
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003407PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003408"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003409"\n"
3410"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003411"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412
3413static PyObject *
3414string_rjust(PyStringObject *self, PyObject *args)
3415{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003416 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003417 char fillchar = ' ';
3418
Thomas Wouters4abb3662006-04-19 14:50:15 +00003419 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003420 return NULL;
3421
Tim Peters8fa5dd02001-09-12 02:18:30 +00003422 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423 Py_INCREF(self);
3424 return (PyObject*) self;
3425 }
3426
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003427 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003428}
3429
3430
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003431PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003432"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003433"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434"Return S centered in a string of length width. Padding is\n"
3435"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436
3437static PyObject *
3438string_center(PyStringObject *self, PyObject *args)
3439{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003440 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003441 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443
Thomas Wouters4abb3662006-04-19 14:50:15 +00003444 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445 return NULL;
3446
Tim Peters8fa5dd02001-09-12 02:18:30 +00003447 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448 Py_INCREF(self);
3449 return (PyObject*) self;
3450 }
3451
3452 marg = width - PyString_GET_SIZE(self);
3453 left = marg / 2 + (marg & width & 1);
3454
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456}
3457
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003458PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003459"S.zfill(width) -> string\n"
3460"\n"
3461"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003462"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003463
3464static PyObject *
3465string_zfill(PyStringObject *self, PyObject *args)
3466{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003467 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003468 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003469 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003470 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003471
Thomas Wouters4abb3662006-04-19 14:50:15 +00003472 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003473 return NULL;
3474
3475 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003476 if (PyString_CheckExact(self)) {
3477 Py_INCREF(self);
3478 return (PyObject*) self;
3479 }
3480 else
3481 return PyString_FromStringAndSize(
3482 PyString_AS_STRING(self),
3483 PyString_GET_SIZE(self)
3484 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003485 }
3486
3487 fill = width - PyString_GET_SIZE(self);
3488
3489 s = pad(self, fill, 0, '0');
3490
3491 if (s == NULL)
3492 return NULL;
3493
3494 p = PyString_AS_STRING(s);
3495 if (p[fill] == '+' || p[fill] == '-') {
3496 /* move sign to beginning of string */
3497 p[0] = p[fill];
3498 p[fill] = '0';
3499 }
3500
3501 return (PyObject*) s;
3502}
3503
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003504PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003505"S.isspace() -> bool\n\
3506\n\
3507Return True if all characters in S are whitespace\n\
3508and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509
3510static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003511string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512{
Fred Drakeba096332000-07-09 07:04:36 +00003513 register const unsigned char *p
3514 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003515 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516
Guido van Rossum4c08d552000-03-10 22:55:18 +00003517 /* Shortcut for single character strings */
3518 if (PyString_GET_SIZE(self) == 1 &&
3519 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003520 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003521
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003522 /* Special case for empty strings */
3523 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003524 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003525
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526 e = p + PyString_GET_SIZE(self);
3527 for (; p < e; p++) {
3528 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003529 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003531 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532}
3533
3534
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003535PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003536"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003537\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003538Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003539and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003540
3541static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003542string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003543{
Fred Drakeba096332000-07-09 07:04:36 +00003544 register const unsigned char *p
3545 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003546 register const unsigned char *e;
3547
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003548 /* Shortcut for single character strings */
3549 if (PyString_GET_SIZE(self) == 1 &&
3550 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003551 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003552
3553 /* Special case for empty strings */
3554 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003555 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003556
3557 e = p + PyString_GET_SIZE(self);
3558 for (; p < e; p++) {
3559 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003563}
3564
3565
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003566PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003568\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003569Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003570and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571
3572static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003573string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574{
Fred Drakeba096332000-07-09 07:04:36 +00003575 register const unsigned char *p
3576 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003577 register const unsigned char *e;
3578
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 /* Shortcut for single character strings */
3580 if (PyString_GET_SIZE(self) == 1 &&
3581 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003583
3584 /* Special case for empty strings */
3585 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587
3588 e = p + PyString_GET_SIZE(self);
3589 for (; p < e; p++) {
3590 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003591 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594}
3595
3596
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003597PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003600Return True if all characters in S are digits\n\
3601and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003602
3603static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003604string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003605{
Fred Drakeba096332000-07-09 07:04:36 +00003606 register const unsigned char *p
3607 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003608 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609
Guido van Rossum4c08d552000-03-10 22:55:18 +00003610 /* Shortcut for single character strings */
3611 if (PyString_GET_SIZE(self) == 1 &&
3612 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003614
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003615 /* Special case for empty strings */
3616 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003618
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619 e = p + PyString_GET_SIZE(self);
3620 for (; p < e; p++) {
3621 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003622 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625}
3626
3627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003628PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003632at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633
3634static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003635string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636{
Fred Drakeba096332000-07-09 07:04:36 +00003637 register const unsigned char *p
3638 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003639 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640 int cased;
3641
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 /* Shortcut for single character strings */
3643 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003646 /* Special case for empty strings */
3647 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003649
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650 e = p + PyString_GET_SIZE(self);
3651 cased = 0;
3652 for (; p < e; p++) {
3653 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 else if (!cased && islower(*p))
3656 cased = 1;
3657 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659}
3660
3661
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003662PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003665Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003666at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667
3668static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003669string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670{
Fred Drakeba096332000-07-09 07:04:36 +00003671 register const unsigned char *p
3672 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003673 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674 int cased;
3675
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 /* Shortcut for single character strings */
3677 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003678 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003679
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003680 /* Special case for empty strings */
3681 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003683
Guido van Rossum4c08d552000-03-10 22:55:18 +00003684 e = p + PyString_GET_SIZE(self);
3685 cased = 0;
3686 for (; p < e; p++) {
3687 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689 else if (!cased && isupper(*p))
3690 cased = 1;
3691 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693}
3694
3695
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003696PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003699Return True if S is a titlecased string and there is at least one\n\
3700character in S, i.e. uppercase characters may only follow uncased\n\
3701characters and lowercase characters only cased ones. Return False\n\
3702otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
3704static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003705string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706{
Fred Drakeba096332000-07-09 07:04:36 +00003707 register const unsigned char *p
3708 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003709 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710 int cased, previous_is_cased;
3711
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 /* Shortcut for single character strings */
3713 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003714 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003716 /* Special case for empty strings */
3717 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003719
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 e = p + PyString_GET_SIZE(self);
3721 cased = 0;
3722 previous_is_cased = 0;
3723 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003724 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725
3726 if (isupper(ch)) {
3727 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 previous_is_cased = 1;
3730 cased = 1;
3731 }
3732 else if (islower(ch)) {
3733 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735 previous_is_cased = 1;
3736 cased = 1;
3737 }
3738 else
3739 previous_is_cased = 0;
3740 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003741 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742}
3743
3744
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003745PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003746"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747\n\
3748Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003749Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003750is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752static PyObject*
3753string_splitlines(PyStringObject *self, PyObject *args)
3754{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003755 register Py_ssize_t i;
3756 register Py_ssize_t j;
3757 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003758 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759 PyObject *list;
3760 PyObject *str;
3761 char *data;
3762
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003763 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764 return NULL;
3765
3766 data = PyString_AS_STRING(self);
3767 len = PyString_GET_SIZE(self);
3768
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003769 /* This does not use the preallocated list because splitlines is
3770 usually run with hundreds of newlines. The overhead of
3771 switching between PyList_SET_ITEM and append causes about a
3772 2-3% slowdown for that common case. A smarter implementation
3773 could move the if check out, so the SET_ITEMs are done first
3774 and the appends only done when the prealloc buffer is full.
3775 That's too much work for little gain.*/
3776
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 list = PyList_New(0);
3778 if (!list)
3779 goto onError;
3780
3781 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003782 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003783
Guido van Rossum4c08d552000-03-10 22:55:18 +00003784 /* Find a line and append it */
3785 while (i < len && data[i] != '\n' && data[i] != '\r')
3786 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787
3788 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003789 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 if (i < len) {
3791 if (data[i] == '\r' && i + 1 < len &&
3792 data[i+1] == '\n')
3793 i += 2;
3794 else
3795 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003796 if (keepends)
3797 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003798 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003799 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 j = i;
3801 }
3802 if (j < len) {
3803 SPLIT_APPEND(data, j, len);
3804 }
3805
3806 return list;
3807
3808 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003809 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 return NULL;
3811}
3812
3813#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003814#undef SPLIT_ADD
3815#undef MAX_PREALLOC
3816#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003818static PyObject *
3819string_getnewargs(PyStringObject *v)
3820{
3821 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3822}
3823
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003824
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003825static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003826string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827 /* Counterparts of the obsolete stropmodule functions; except
3828 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003829 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3830 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003831 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003832 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3833 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003834 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3835 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3836 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3837 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3838 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3839 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3840 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003841 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3842 capitalize__doc__},
3843 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3844 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3845 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003846 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003847 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3848 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3849 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3850 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3851 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3852 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3853 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003854 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3855 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003856 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3857 startswith__doc__},
3858 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3859 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3860 swapcase__doc__},
3861 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3862 translate__doc__},
3863 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3864 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3865 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3866 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3867 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3868 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3869 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3870 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3871 expandtabs__doc__},
3872 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3873 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003874 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003875 {NULL, NULL} /* sentinel */
3876};
3877
Jeremy Hylton938ace62002-07-17 16:30:39 +00003878static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003879str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3880
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003881static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003882string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003883{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003884 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003885 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003886
Guido van Rossumae960af2001-08-30 03:11:59 +00003887 if (type != &PyString_Type)
3888 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003889 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3890 return NULL;
3891 if (x == NULL)
3892 return PyString_FromString("");
3893 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003894}
3895
Guido van Rossumae960af2001-08-30 03:11:59 +00003896static PyObject *
3897str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3898{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003899 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003900 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003901
3902 assert(PyType_IsSubtype(type, &PyString_Type));
3903 tmp = string_new(&PyString_Type, args, kwds);
3904 if (tmp == NULL)
3905 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003906 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003907 n = PyString_GET_SIZE(tmp);
3908 pnew = type->tp_alloc(type, n);
3909 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003910 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003911 ((PyStringObject *)pnew)->ob_shash =
3912 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003913 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003914 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003915 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003916 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003917}
3918
Guido van Rossumcacfc072002-05-24 19:01:59 +00003919static PyObject *
3920basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3921{
3922 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003923 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003924 return NULL;
3925}
3926
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003927static PyObject *
3928string_mod(PyObject *v, PyObject *w)
3929{
3930 if (!PyString_Check(v)) {
3931 Py_INCREF(Py_NotImplemented);
3932 return Py_NotImplemented;
3933 }
3934 return PyString_Format(v, w);
3935}
3936
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003937PyDoc_STRVAR(basestring_doc,
3938"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003939
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003940static PyNumberMethods string_as_number = {
3941 0, /*nb_add*/
3942 0, /*nb_subtract*/
3943 0, /*nb_multiply*/
3944 0, /*nb_divide*/
3945 string_mod, /*nb_remainder*/
3946};
3947
3948
Guido van Rossumcacfc072002-05-24 19:01:59 +00003949PyTypeObject PyBaseString_Type = {
3950 PyObject_HEAD_INIT(&PyType_Type)
3951 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003952 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003953 0,
3954 0,
3955 0, /* tp_dealloc */
3956 0, /* tp_print */
3957 0, /* tp_getattr */
3958 0, /* tp_setattr */
3959 0, /* tp_compare */
3960 0, /* tp_repr */
3961 0, /* tp_as_number */
3962 0, /* tp_as_sequence */
3963 0, /* tp_as_mapping */
3964 0, /* tp_hash */
3965 0, /* tp_call */
3966 0, /* tp_str */
3967 0, /* tp_getattro */
3968 0, /* tp_setattro */
3969 0, /* tp_as_buffer */
3970 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3971 basestring_doc, /* tp_doc */
3972 0, /* tp_traverse */
3973 0, /* tp_clear */
3974 0, /* tp_richcompare */
3975 0, /* tp_weaklistoffset */
3976 0, /* tp_iter */
3977 0, /* tp_iternext */
3978 0, /* tp_methods */
3979 0, /* tp_members */
3980 0, /* tp_getset */
3981 &PyBaseObject_Type, /* tp_base */
3982 0, /* tp_dict */
3983 0, /* tp_descr_get */
3984 0, /* tp_descr_set */
3985 0, /* tp_dictoffset */
3986 0, /* tp_init */
3987 0, /* tp_alloc */
3988 basestring_new, /* tp_new */
3989 0, /* tp_free */
3990};
3991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003992PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003993"str(object) -> string\n\
3994\n\
3995Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003996If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003997
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003998PyTypeObject PyString_Type = {
3999 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004000 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004001 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004002 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004003 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004004 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004005 (printfunc)string_print, /* tp_print */
4006 0, /* tp_getattr */
4007 0, /* tp_setattr */
4008 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004009 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004010 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004012 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004013 (hashfunc)string_hash, /* tp_hash */
4014 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004015 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004016 PyObject_GenericGetAttr, /* tp_getattro */
4017 0, /* tp_setattro */
4018 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004019 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004020 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004021 string_doc, /* tp_doc */
4022 0, /* tp_traverse */
4023 0, /* tp_clear */
4024 (richcmpfunc)string_richcompare, /* tp_richcompare */
4025 0, /* tp_weaklistoffset */
4026 0, /* tp_iter */
4027 0, /* tp_iternext */
4028 string_methods, /* tp_methods */
4029 0, /* tp_members */
4030 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004031 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004032 0, /* tp_dict */
4033 0, /* tp_descr_get */
4034 0, /* tp_descr_set */
4035 0, /* tp_dictoffset */
4036 0, /* tp_init */
4037 0, /* tp_alloc */
4038 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004039 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004040};
4041
4042void
Fred Drakeba096332000-07-09 07:04:36 +00004043PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004044{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004045 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004046 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004047 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004048 if (w == NULL || !PyString_Check(*pv)) {
4049 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004050 *pv = NULL;
4051 return;
4052 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004053 v = string_concat((PyStringObject *) *pv, w);
4054 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004055 *pv = v;
4056}
4057
Guido van Rossum013142a1994-08-30 08:19:36 +00004058void
Fred Drakeba096332000-07-09 07:04:36 +00004059PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004060{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004061 PyString_Concat(pv, w);
4062 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004063}
4064
4065
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066/* The following function breaks the notion that strings are immutable:
4067 it changes the size of a string. We get away with this only if there
4068 is only one module referencing the object. You can also think of it
4069 as creating a new string object and destroying the old one, only
4070 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004071 already be known to some other part of the code...
4072 Note that if there's not enough memory to resize the string, the original
4073 string object at *pv is deallocated, *pv is set to NULL, an "out of
4074 memory" exception is set, and -1 is returned. Else (on success) 0 is
4075 returned, and the value in *pv may or may not be the same as on input.
4076 As always, an extra byte is allocated for a trailing \0 byte (newsize
4077 does *not* include that), and a trailing \0 byte is stored.
4078*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004079
4080int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004081_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004082{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 register PyObject *v;
4084 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004085 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004086 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4087 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004089 Py_DECREF(v);
4090 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004091 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004093 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004094 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004095 _Py_ForgetReference(v);
4096 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004097 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004098 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004099 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004100 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004101 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004102 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004103 _Py_NewReference(*pv);
4104 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004105 sv->ob_size = newsize;
4106 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004107 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004108 return 0;
4109}
Guido van Rossume5372401993-03-16 12:15:04 +00004110
4111/* Helpers for formatstring */
4112
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004113Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004114getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004115{
Thomas Wouters977485d2006-02-16 15:59:12 +00004116 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004117 if (argidx < arglen) {
4118 (*p_argidx)++;
4119 if (arglen < 0)
4120 return args;
4121 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004122 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004123 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004124 PyErr_SetString(PyExc_TypeError,
4125 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004126 return NULL;
4127}
4128
Tim Peters38fd5b62000-09-21 05:43:11 +00004129/* Format codes
4130 * F_LJUST '-'
4131 * F_SIGN '+'
4132 * F_BLANK ' '
4133 * F_ALT '#'
4134 * F_ZERO '0'
4135 */
Guido van Rossume5372401993-03-16 12:15:04 +00004136#define F_LJUST (1<<0)
4137#define F_SIGN (1<<1)
4138#define F_BLANK (1<<2)
4139#define F_ALT (1<<3)
4140#define F_ZERO (1<<4)
4141
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004142Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004143formatfloat(char *buf, size_t buflen, int flags,
4144 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004145{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004146 /* fmt = '%#.' + `prec` + `type`
4147 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004148 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004149 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004150 x = PyFloat_AsDouble(v);
4151 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004152 PyErr_Format(PyExc_TypeError, "float argument required, "
4153 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004154 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004155 }
Guido van Rossume5372401993-03-16 12:15:04 +00004156 if (prec < 0)
4157 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004158 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4159 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004160 /* Worst case length calc to ensure no buffer overrun:
4161
4162 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004163 fmt = %#.<prec>g
4164 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004165 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004166 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004167
4168 'f' formats:
4169 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4170 len = 1 + 50 + 1 + prec = 52 + prec
4171
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004172 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004173 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004174
4175 */
4176 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4177 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004178 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004179 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004180 return -1;
4181 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004182 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4183 (flags&F_ALT) ? "#" : "",
4184 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004185 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004186 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004187}
4188
Tim Peters38fd5b62000-09-21 05:43:11 +00004189/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4190 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4191 * Python's regular ints.
4192 * Return value: a new PyString*, or NULL if error.
4193 * . *pbuf is set to point into it,
4194 * *plen set to the # of chars following that.
4195 * Caller must decref it when done using pbuf.
4196 * The string starting at *pbuf is of the form
4197 * "-"? ("0x" | "0X")? digit+
4198 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004199 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004200 * There will be at least prec digits, zero-filled on the left if
4201 * necessary to get that many.
4202 * val object to be converted
4203 * flags bitmask of format flags; only F_ALT is looked at
4204 * prec minimum number of digits; 0-fill on left if needed
4205 * type a character in [duoxX]; u acts the same as d
4206 *
4207 * CAUTION: o, x and X conversions on regular ints can never
4208 * produce a '-' sign, but can for Python's unbounded ints.
4209 */
4210PyObject*
4211_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4212 char **pbuf, int *plen)
4213{
4214 PyObject *result = NULL;
4215 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004216 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004217 int sign; /* 1 if '-', else 0 */
4218 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004219 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004220 int numdigits; /* len == numnondigits + numdigits */
4221 int numnondigits = 0;
4222
4223 switch (type) {
4224 case 'd':
4225 case 'u':
4226 result = val->ob_type->tp_str(val);
4227 break;
4228 case 'o':
4229 result = val->ob_type->tp_as_number->nb_oct(val);
4230 break;
4231 case 'x':
4232 case 'X':
4233 numnondigits = 2;
4234 result = val->ob_type->tp_as_number->nb_hex(val);
4235 break;
4236 default:
4237 assert(!"'type' not in [duoxX]");
4238 }
4239 if (!result)
4240 return NULL;
4241
Neal Norwitz56423e52006-08-13 18:11:08 +00004242 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004243 if (!buf) {
4244 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004245 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004246 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004247
Tim Peters38fd5b62000-09-21 05:43:11 +00004248 /* To modify the string in-place, there can only be one reference. */
4249 if (result->ob_refcnt != 1) {
4250 PyErr_BadInternalCall();
4251 return NULL;
4252 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004253 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004254 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004255 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4256 return NULL;
4257 }
4258 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004259 if (buf[len-1] == 'L') {
4260 --len;
4261 buf[len] = '\0';
4262 }
4263 sign = buf[0] == '-';
4264 numnondigits += sign;
4265 numdigits = len - numnondigits;
4266 assert(numdigits > 0);
4267
Tim Petersfff53252001-04-12 18:38:48 +00004268 /* Get rid of base marker unless F_ALT */
4269 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004270 /* Need to skip 0x, 0X or 0. */
4271 int skipped = 0;
4272 switch (type) {
4273 case 'o':
4274 assert(buf[sign] == '0');
4275 /* If 0 is only digit, leave it alone. */
4276 if (numdigits > 1) {
4277 skipped = 1;
4278 --numdigits;
4279 }
4280 break;
4281 case 'x':
4282 case 'X':
4283 assert(buf[sign] == '0');
4284 assert(buf[sign + 1] == 'x');
4285 skipped = 2;
4286 numnondigits -= 2;
4287 break;
4288 }
4289 if (skipped) {
4290 buf += skipped;
4291 len -= skipped;
4292 if (sign)
4293 buf[0] = '-';
4294 }
4295 assert(len == numnondigits + numdigits);
4296 assert(numdigits > 0);
4297 }
4298
4299 /* Fill with leading zeroes to meet minimum width. */
4300 if (prec > numdigits) {
4301 PyObject *r1 = PyString_FromStringAndSize(NULL,
4302 numnondigits + prec);
4303 char *b1;
4304 if (!r1) {
4305 Py_DECREF(result);
4306 return NULL;
4307 }
4308 b1 = PyString_AS_STRING(r1);
4309 for (i = 0; i < numnondigits; ++i)
4310 *b1++ = *buf++;
4311 for (i = 0; i < prec - numdigits; i++)
4312 *b1++ = '0';
4313 for (i = 0; i < numdigits; i++)
4314 *b1++ = *buf++;
4315 *b1 = '\0';
4316 Py_DECREF(result);
4317 result = r1;
4318 buf = PyString_AS_STRING(result);
4319 len = numnondigits + prec;
4320 }
4321
4322 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004323 if (type == 'X') {
4324 /* Need to convert all lower case letters to upper case.
4325 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004326 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004327 if (buf[i] >= 'a' && buf[i] <= 'x')
4328 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004329 }
4330 *pbuf = buf;
4331 *plen = len;
4332 return result;
4333}
4334
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004335Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004336formatint(char *buf, size_t buflen, int flags,
4337 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004338{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004339 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004340 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4341 + 1 + 1 = 24 */
4342 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004343 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004344 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004345
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004346 x = PyInt_AsLong(v);
4347 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004348 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4349 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004350 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004351 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004352 if (x < 0 && type == 'u') {
4353 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004354 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004355 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4356 sign = "-";
4357 else
4358 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004359 if (prec < 0)
4360 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004361
4362 if ((flags & F_ALT) &&
4363 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004364 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004365 * of issues that cause pain:
4366 * - when 0 is being converted, the C standard leaves off
4367 * the '0x' or '0X', which is inconsistent with other
4368 * %#x/%#X conversions and inconsistent with Python's
4369 * hex() function
4370 * - there are platforms that violate the standard and
4371 * convert 0 with the '0x' or '0X'
4372 * (Metrowerks, Compaq Tru64)
4373 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004374 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004375 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004376 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004377 * We can achieve the desired consistency by inserting our
4378 * own '0x' or '0X' prefix, and substituting %x/%X in place
4379 * of %#x/%#X.
4380 *
4381 * Note that this is the same approach as used in
4382 * formatint() in unicodeobject.c
4383 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004384 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4385 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004386 }
4387 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004388 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4389 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004390 prec, type);
4391 }
4392
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004393 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4394 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004395 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004396 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004397 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004398 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004399 return -1;
4400 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004401 if (sign[0])
4402 PyOS_snprintf(buf, buflen, fmt, -x);
4403 else
4404 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004405 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004406}
4407
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004408Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004409formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004410{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004411 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004412 if (PyString_Check(v)) {
4413 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004414 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004415 }
4416 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004417 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004418 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004419 }
4420 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004421 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004422}
4423
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004424/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4425
4426 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4427 chars are formatted. XXX This is a magic number. Each formatting
4428 routine does bounds checking to ensure no overflow, but a better
4429 solution may be to malloc a buffer of appropriate size for each
4430 format. For now, the current solution is sufficient.
4431*/
4432#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004433
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004434PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004435PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004436{
4437 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004438 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004439 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004440 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004441 PyObject *result, *orig_args;
4442#ifdef Py_USING_UNICODE
4443 PyObject *v, *w;
4444#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004445 PyObject *dict = NULL;
4446 if (format == NULL || !PyString_Check(format) || args == NULL) {
4447 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004448 return NULL;
4449 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004450 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004451 fmt = PyString_AS_STRING(format);
4452 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004453 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004454 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004455 if (result == NULL)
4456 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004457 res = PyString_AsString(result);
4458 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004459 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004460 argidx = 0;
4461 }
4462 else {
4463 arglen = -1;
4464 argidx = -2;
4465 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004466 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4467 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004468 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004469 while (--fmtcnt >= 0) {
4470 if (*fmt != '%') {
4471 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004472 rescnt = fmtcnt + 100;
4473 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004475 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004476 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004477 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004478 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004479 }
4480 *res++ = *fmt++;
4481 }
4482 else {
4483 /* Got a format specifier */
4484 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004485 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004486 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004487 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004488 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004489 PyObject *v = NULL;
4490 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004491 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004492 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004493 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004494 char formatbuf[FORMATBUFLEN];
4495 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004496#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004497 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004498 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004499#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004500
Guido van Rossumda9c2711996-12-05 21:58:58 +00004501 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004502 if (*fmt == '(') {
4503 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004504 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004505 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004506 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004507
4508 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004509 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004510 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004511 goto error;
4512 }
4513 ++fmt;
4514 --fmtcnt;
4515 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004516 /* Skip over balanced parentheses */
4517 while (pcount > 0 && --fmtcnt >= 0) {
4518 if (*fmt == ')')
4519 --pcount;
4520 else if (*fmt == '(')
4521 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004522 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004523 }
4524 keylen = fmt - keystart - 1;
4525 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004526 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004527 "incomplete format key");
4528 goto error;
4529 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004530 key = PyString_FromStringAndSize(keystart,
4531 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004532 if (key == NULL)
4533 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004534 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004536 args_owned = 0;
4537 }
4538 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004539 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004540 if (args == NULL) {
4541 goto error;
4542 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004543 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004544 arglen = -1;
4545 argidx = -2;
4546 }
Guido van Rossume5372401993-03-16 12:15:04 +00004547 while (--fmtcnt >= 0) {
4548 switch (c = *fmt++) {
4549 case '-': flags |= F_LJUST; continue;
4550 case '+': flags |= F_SIGN; continue;
4551 case ' ': flags |= F_BLANK; continue;
4552 case '#': flags |= F_ALT; continue;
4553 case '0': flags |= F_ZERO; continue;
4554 }
4555 break;
4556 }
4557 if (c == '*') {
4558 v = getnextarg(args, arglen, &argidx);
4559 if (v == NULL)
4560 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004561 if (!PyInt_Check(v)) {
4562 PyErr_SetString(PyExc_TypeError,
4563 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004564 goto error;
4565 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004567 if (width < 0) {
4568 flags |= F_LJUST;
4569 width = -width;
4570 }
Guido van Rossume5372401993-03-16 12:15:04 +00004571 if (--fmtcnt >= 0)
4572 c = *fmt++;
4573 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004574 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004575 width = c - '0';
4576 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004577 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004578 if (!isdigit(c))
4579 break;
4580 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004581 PyErr_SetString(
4582 PyExc_ValueError,
4583 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004584 goto error;
4585 }
4586 width = width*10 + (c - '0');
4587 }
4588 }
4589 if (c == '.') {
4590 prec = 0;
4591 if (--fmtcnt >= 0)
4592 c = *fmt++;
4593 if (c == '*') {
4594 v = getnextarg(args, arglen, &argidx);
4595 if (v == NULL)
4596 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 if (!PyInt_Check(v)) {
4598 PyErr_SetString(
4599 PyExc_TypeError,
4600 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004601 goto error;
4602 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004603 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004604 if (prec < 0)
4605 prec = 0;
4606 if (--fmtcnt >= 0)
4607 c = *fmt++;
4608 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004609 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004610 prec = c - '0';
4611 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004612 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004613 if (!isdigit(c))
4614 break;
4615 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004616 PyErr_SetString(
4617 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004618 "prec too big");
4619 goto error;
4620 }
4621 prec = prec*10 + (c - '0');
4622 }
4623 }
4624 } /* prec */
4625 if (fmtcnt >= 0) {
4626 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004627 if (--fmtcnt >= 0)
4628 c = *fmt++;
4629 }
4630 }
4631 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004632 PyErr_SetString(PyExc_ValueError,
4633 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004634 goto error;
4635 }
4636 if (c != '%') {
4637 v = getnextarg(args, arglen, &argidx);
4638 if (v == NULL)
4639 goto error;
4640 }
4641 sign = 0;
4642 fill = ' ';
4643 switch (c) {
4644 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004645 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004646 len = 1;
4647 break;
4648 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004649#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004650 if (PyUnicode_Check(v)) {
4651 fmt = fmt_start;
4652 argidx = argidx_start;
4653 goto unicode;
4654 }
Georg Brandld45014b2005-10-01 17:06:00 +00004655#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004656 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004657#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004658 if (temp != NULL && PyUnicode_Check(temp)) {
4659 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004660 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004661 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004662 goto unicode;
4663 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004664#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004665 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004666 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004667 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004668 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004669 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004670 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004671 if (!PyString_Check(temp)) {
4672 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004673 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004674 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004675 goto error;
4676 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004677 pbuf = PyString_AS_STRING(temp);
4678 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004679 if (prec >= 0 && len > prec)
4680 len = prec;
4681 break;
4682 case 'i':
4683 case 'd':
4684 case 'u':
4685 case 'o':
4686 case 'x':
4687 case 'X':
4688 if (c == 'i')
4689 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004690 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004691 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004692 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004693 prec, c, &pbuf, &ilen);
4694 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004695 if (!temp)
4696 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004697 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004698 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004699 else {
4700 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004701 len = formatint(pbuf,
4702 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004703 flags, prec, c, v);
4704 if (len < 0)
4705 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004706 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004707 }
4708 if (flags & F_ZERO)
4709 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004710 break;
4711 case 'e':
4712 case 'E':
4713 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004714 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004715 case 'g':
4716 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004717 if (c == 'F')
4718 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004719 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004720 len = formatfloat(pbuf, sizeof(formatbuf),
4721 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004722 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004723 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004724 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004725 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004726 fill = '0';
4727 break;
4728 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004729#ifdef Py_USING_UNICODE
4730 if (PyUnicode_Check(v)) {
4731 fmt = fmt_start;
4732 argidx = argidx_start;
4733 goto unicode;
4734 }
4735#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004736 pbuf = formatbuf;
4737 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004738 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004739 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004740 break;
4741 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004742 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004743 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004744 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004745 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004746 (Py_ssize_t)(fmt - 1 -
4747 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004748 goto error;
4749 }
4750 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004751 if (*pbuf == '-' || *pbuf == '+') {
4752 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004753 len--;
4754 }
4755 else if (flags & F_SIGN)
4756 sign = '+';
4757 else if (flags & F_BLANK)
4758 sign = ' ';
4759 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004760 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004761 }
4762 if (width < len)
4763 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004764 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004765 reslen -= rescnt;
4766 rescnt = width + fmtcnt + 100;
4767 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004768 if (reslen < 0) {
4769 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004770 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004771 return PyErr_NoMemory();
4772 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004773 if (_PyString_Resize(&result, reslen) < 0) {
4774 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004775 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004776 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004777 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004778 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004779 }
4780 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004781 if (fill != ' ')
4782 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004783 rescnt--;
4784 if (width > len)
4785 width--;
4786 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004787 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4788 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004789 assert(pbuf[1] == c);
4790 if (fill != ' ') {
4791 *res++ = *pbuf++;
4792 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004793 }
Tim Petersfff53252001-04-12 18:38:48 +00004794 rescnt -= 2;
4795 width -= 2;
4796 if (width < 0)
4797 width = 0;
4798 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004799 }
4800 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004801 do {
4802 --rescnt;
4803 *res++ = fill;
4804 } while (--width > len);
4805 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004806 if (fill == ' ') {
4807 if (sign)
4808 *res++ = sign;
4809 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004810 (c == 'x' || c == 'X')) {
4811 assert(pbuf[0] == '0');
4812 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 *res++ = *pbuf++;
4814 *res++ = *pbuf++;
4815 }
4816 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004817 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004818 res += len;
4819 rescnt -= len;
4820 while (--width >= len) {
4821 --rescnt;
4822 *res++ = ' ';
4823 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004824 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004825 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004826 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004827 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004828 goto error;
4829 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004830 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004831 } /* '%' */
4832 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004833 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004834 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004835 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004836 goto error;
4837 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004838 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004839 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004840 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004841 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004842 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004843
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004844#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004845 unicode:
4846 if (args_owned) {
4847 Py_DECREF(args);
4848 args_owned = 0;
4849 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004850 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004851 if (PyTuple_Check(orig_args) && argidx > 0) {
4852 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004853 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004854 v = PyTuple_New(n);
4855 if (v == NULL)
4856 goto error;
4857 while (--n >= 0) {
4858 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4859 Py_INCREF(w);
4860 PyTuple_SET_ITEM(v, n, w);
4861 }
4862 args = v;
4863 } else {
4864 Py_INCREF(orig_args);
4865 args = orig_args;
4866 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004867 args_owned = 1;
4868 /* Take what we have of the result and let the Unicode formatting
4869 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004870 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004871 if (_PyString_Resize(&result, rescnt))
4872 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004873 fmtcnt = PyString_GET_SIZE(format) - \
4874 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004875 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4876 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004877 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004878 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004879 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004880 if (v == NULL)
4881 goto error;
4882 /* Paste what we have (result) to what the Unicode formatting
4883 function returned (v) and return the result (or error) */
4884 w = PyUnicode_Concat(result, v);
4885 Py_DECREF(result);
4886 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004887 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004888 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004889#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004890
Guido van Rossume5372401993-03-16 12:15:04 +00004891 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004892 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004893 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004894 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004895 }
Guido van Rossume5372401993-03-16 12:15:04 +00004896 return NULL;
4897}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004898
Guido van Rossum2a61e741997-01-18 07:55:05 +00004899void
Fred Drakeba096332000-07-09 07:04:36 +00004900PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004901{
4902 register PyStringObject *s = (PyStringObject *)(*p);
4903 PyObject *t;
4904 if (s == NULL || !PyString_Check(s))
4905 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004906 /* If it's a string subclass, we don't really know what putting
4907 it in the interned dict might do. */
4908 if (!PyString_CheckExact(s))
4909 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004910 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004911 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004912 if (interned == NULL) {
4913 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004914 if (interned == NULL) {
4915 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004917 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004918 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004919 t = PyDict_GetItem(interned, (PyObject *)s);
4920 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004921 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004922 Py_DECREF(*p);
4923 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004924 return;
4925 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004926
Armin Rigo79f7ad22004-08-07 19:27:39 +00004927 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004928 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004929 return;
4930 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004931 /* The two references in interned are not counted by refcnt.
4932 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004933 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004934 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004935}
4936
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004937void
4938PyString_InternImmortal(PyObject **p)
4939{
4940 PyString_InternInPlace(p);
4941 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4942 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4943 Py_INCREF(*p);
4944 }
4945}
4946
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947
4948PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004949PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004950{
4951 PyObject *s = PyString_FromString(cp);
4952 if (s == NULL)
4953 return NULL;
4954 PyString_InternInPlace(&s);
4955 return s;
4956}
4957
Guido van Rossum8cf04761997-08-02 02:57:45 +00004958void
Fred Drakeba096332000-07-09 07:04:36 +00004959PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004960{
4961 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004962 for (i = 0; i < UCHAR_MAX + 1; i++) {
4963 Py_XDECREF(characters[i]);
4964 characters[i] = NULL;
4965 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004966 Py_XDECREF(nullstring);
4967 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004968}
Barry Warsawa903ad982001-02-23 16:40:48 +00004969
Barry Warsawa903ad982001-02-23 16:40:48 +00004970void _Py_ReleaseInternedStrings(void)
4971{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004972 PyObject *keys;
4973 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004974 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00004975 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004976
4977 if (interned == NULL || !PyDict_Check(interned))
4978 return;
4979 keys = PyDict_Keys(interned);
4980 if (keys == NULL || !PyList_Check(keys)) {
4981 PyErr_Clear();
4982 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004983 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004984
4985 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4986 detector, interned strings are not forcibly deallocated; rather, we
4987 give them their stolen references back, and then clear and DECREF
4988 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004989
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004990 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00004991 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4992 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004993 for (i = 0; i < n; i++) {
4994 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4995 switch (s->ob_sstate) {
4996 case SSTATE_NOT_INTERNED:
4997 /* XXX Shouldn't happen */
4998 break;
4999 case SSTATE_INTERNED_IMMORTAL:
5000 s->ob_refcnt += 1;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005001 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005002 break;
5003 case SSTATE_INTERNED_MORTAL:
5004 s->ob_refcnt += 2;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005005 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005006 break;
5007 default:
5008 Py_FatalError("Inconsistent interned string state.");
5009 }
5010 s->ob_sstate = SSTATE_NOT_INTERNED;
5011 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005012 fprintf(stderr, "total size of all interned strings: "
5013 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5014 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005015 Py_DECREF(keys);
5016 PyDict_Clear(interned);
5017 Py_DECREF(interned);
5018 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005019}