blob: 4c2faf45437da314dcd30cce1892c7c3121e4866 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000807#ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809#else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000812 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '"';
820
821 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000830 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000834 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000835 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000838 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839}
840
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841PyObject *
842PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000844 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000845 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000847 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
850 }
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000853 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
855 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000856 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 register char c;
858 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859 int quote;
860
Thomas Wouters7e474022000-07-16 12:04:32 +0000861 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000862 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000863 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000865 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 quote = '"';
867
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000869 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000875 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000890 else
891 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000896 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000897 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000898 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900}
901
Guido van Rossum189f1df2001-05-01 16:51:53 +0000902static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000903string_repr(PyObject *op)
904{
905 return PyString_Repr(op, 1);
906}
907
908static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000909string_str(PyObject *s)
910{
Tim Petersc9933152001-10-16 20:18:24 +0000911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
915 }
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921}
922
Martin v. Löwis18e16552006-02-15 17:27:45 +0000923static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000924string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925{
926 return a->ob_size;
927}
928
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000930string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931{
Andrew Dalke598710c2006-05-25 18:18:39 +0000932 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000935#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000938#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000939 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000940 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000941 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942 return NULL;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
951 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
955 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
960 }
961
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000962 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000971 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973#undef b
974}
975
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000983 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 if (n < 0)
985 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
988 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
994 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 Py_INCREF(a);
997 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 }
Tim Peterse7c05322004-06-27 17:24:49 +0000999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1004 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001007 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001009 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001010 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001011 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1016 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001017 i = 0;
1018 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001021 }
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001025 i += j;
1026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
1030/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001033string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001035 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036{
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 Py_INCREF(a);
1046 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047 }
1048 if (j < i)
1049 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
Guido van Rossum9284a572000-03-07 15:53:43 +00001053static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001054string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001055{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001056 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001061 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001066 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001067
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001069}
1070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001072string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001074 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001077 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 return NULL;
1079 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001084 else {
1085#ifdef COUNT_ALLOCS
1086 one_strings++;
1087#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001088 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001089 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001090 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091}
1092
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093static PyObject*
1094string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001096 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001099 PyObject *result;
1100
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 result = Py_NotImplemented;
1104 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001105 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1114 }
1115 }
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001121 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164}
1165
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166static long
Fred Drakeba096332000-07-09 07:04:36 +00001167string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 register unsigned char *p;
1171 register long x;
1172
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 if (a->ob_shash != -1)
1174 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001179 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 return x;
1185}
1186
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001187static PyObject*
1188string_subscript(PyStringObject* self, PyObject* item)
1189{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001190 if (PyIndex_Check(item)) {
1191 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001192 if (i == -1 && PyErr_Occurred())
1193 return NULL;
1194 if (i < 0)
1195 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001196 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001197 }
1198 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001199 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200 char* source_buf;
1201 char* result_buf;
1202 PyObject* result;
1203
Tim Petersae1d0c92006-03-17 03:29:34 +00001204 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001205 PyString_GET_SIZE(self),
1206 &start, &stop, &step, &slicelength) < 0) {
1207 return NULL;
1208 }
1209
1210 if (slicelength <= 0) {
1211 return PyString_FromStringAndSize("", 0);
1212 }
1213 else {
1214 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001215 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001216 if (result_buf == NULL)
1217 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001218
Tim Petersae1d0c92006-03-17 03:29:34 +00001219 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 cur += step, i++) {
1221 result_buf[i] = source_buf[cur];
1222 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001223
1224 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001225 slicelength);
1226 PyMem_Free(result_buf);
1227 return result;
1228 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001229 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001231 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232 "string indices must be integers");
1233 return NULL;
1234 }
1235}
1236
Martin v. Löwis18e16552006-02-15 17:27:45 +00001237static Py_ssize_t
1238string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001239{
1240 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001241 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001242 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001243 return -1;
1244 }
1245 *ptr = (void *)self->ob_sval;
1246 return self->ob_size;
1247}
1248
Martin v. Löwis18e16552006-02-15 17:27:45 +00001249static Py_ssize_t
1250string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251{
Guido van Rossum045e6881997-09-08 18:30:11 +00001252 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001253 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254 return -1;
1255}
1256
Martin v. Löwis18e16552006-02-15 17:27:45 +00001257static Py_ssize_t
1258string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001259{
1260 if ( lenp )
1261 *lenp = self->ob_size;
1262 return 1;
1263}
1264
Martin v. Löwis18e16552006-02-15 17:27:45 +00001265static Py_ssize_t
1266string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001267{
1268 if ( index != 0 ) {
1269 PyErr_SetString(PyExc_SystemError,
1270 "accessing non-existent string segment");
1271 return -1;
1272 }
1273 *ptr = self->ob_sval;
1274 return self->ob_size;
1275}
1276
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001277static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001279 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280 (ssizeargfunc)string_repeat, /*sq_repeat*/
1281 (ssizeargfunc)string_item, /*sq_item*/
1282 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001283 0, /*sq_ass_item*/
1284 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001285 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001286};
1287
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001288static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001289 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001290 (binaryfunc)string_subscript,
1291 0,
1292};
1293
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (readbufferproc)string_buffer_getreadbuf,
1296 (writebufferproc)string_buffer_getwritebuf,
1297 (segcountproc)string_buffer_getsegcount,
1298 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001299};
1300
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001301
1302
1303#define LEFTSTRIP 0
1304#define RIGHTSTRIP 1
1305#define BOTHSTRIP 2
1306
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001307/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001308static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1309
1310#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001311
Andrew Dalke525eab32006-05-26 14:00:45 +00001312
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001313/* Don't call if length < 2 */
1314#define Py_STRING_MATCH(target, offset, pattern, length) \
1315 (target[offset] == pattern[0] && \
1316 target[offset+length-1] == pattern[length-1] && \
1317 !memcmp(target+offset+1, pattern+1, length-2) )
1318
1319
Andrew Dalke525eab32006-05-26 14:00:45 +00001320/* Overallocate the initial list to reduce the number of reallocs for small
1321 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1322 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1323 text (roughly 11 words per line) and field delimited data (usually 1-10
1324 fields). For large strings the split algorithms are bandwidth limited
1325 so increasing the preallocation likely will not improve things.*/
1326
1327#define MAX_PREALLOC 12
1328
1329/* 5 splits gives 6 elements */
1330#define PREALLOC_SIZE(maxsplit) \
1331 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1332
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001333#define SPLIT_APPEND(data, left, right) \
1334 str = PyString_FromStringAndSize((data) + (left), \
1335 (right) - (left)); \
1336 if (str == NULL) \
1337 goto onError; \
1338 if (PyList_Append(list, str)) { \
1339 Py_DECREF(str); \
1340 goto onError; \
1341 } \
1342 else \
1343 Py_DECREF(str);
1344
Andrew Dalke02758d62006-05-26 15:21:01 +00001345#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001346 str = PyString_FromStringAndSize((data) + (left), \
1347 (right) - (left)); \
1348 if (str == NULL) \
1349 goto onError; \
1350 if (count < MAX_PREALLOC) { \
1351 PyList_SET_ITEM(list, count, str); \
1352 } else { \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1356 } \
1357 else \
1358 Py_DECREF(str); \
1359 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001360 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001361
1362/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001363#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001364
Andrew Dalke02758d62006-05-26 15:21:01 +00001365#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1366#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1367#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1368#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1369
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001370Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001371split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372{
Andrew Dalke525eab32006-05-26 14:00:45 +00001373 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001374 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001375 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376
1377 if (list == NULL)
1378 return NULL;
1379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380 i = j = 0;
1381
1382 while (maxsplit-- > 0) {
1383 SKIP_SPACE(s, i, len);
1384 if (i==len) break;
1385 j = i; i++;
1386 SKIP_NONSPACE(s, i, len);
1387 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001389
1390 if (i < len) {
1391 /* Only occurs when maxsplit was reached */
1392 /* Skip any remaining whitespace and copy to end of string */
1393 SKIP_SPACE(s, i, len);
1394 if (i != len)
1395 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001397 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001399 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 Py_DECREF(list);
1401 return NULL;
1402}
1403
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001404Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001405split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001406{
Andrew Dalke525eab32006-05-26 14:00:45 +00001407 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001408 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410
1411 if (list == NULL)
1412 return NULL;
1413
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001414 i = j = 0;
1415 while ((j < len) && (maxcount-- > 0)) {
1416 for(; j<len; j++) {
1417 /* I found that using memchr makes no difference */
1418 if (s[j] == ch) {
1419 SPLIT_ADD(s, i, j);
1420 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001422 }
1423 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001424 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001425 if (i <= len) {
1426 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001427 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001428 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001429 return list;
1430
1431 onError:
1432 Py_DECREF(list);
1433 return NULL;
1434}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001436PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437"S.split([sep [,maxsplit]]) -> list of strings\n\
1438\n\
1439Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001440delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001441splits are done. If sep is not specified or is None, any\n\
1442whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443
1444static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001445string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001447 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001448 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001449 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001450 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001451#ifdef USE_FAST
1452 Py_ssize_t pos;
1453#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454
Martin v. Löwis9c830762006-04-13 08:37:17 +00001455 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001457 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001458 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001459 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001461 if (PyString_Check(subobj)) {
1462 sub = PyString_AS_STRING(subobj);
1463 n = PyString_GET_SIZE(subobj);
1464 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001465#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001466 else if (PyUnicode_Check(subobj))
1467 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001468#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1470 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001471
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 if (n == 0) {
1473 PyErr_SetString(PyExc_ValueError, "empty separator");
1474 return NULL;
1475 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001476 else if (n == 1)
1477 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478
Andrew Dalke525eab32006-05-26 14:00:45 +00001479 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 if (list == NULL)
1481 return NULL;
1482
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001483#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001485 while (maxsplit-- > 0) {
1486 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1487 if (pos < 0)
1488 break;
1489 j = i+pos;
1490 SPLIT_ADD(s, i, j);
1491 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001493#else
1494 i = j = 0;
1495 while ((j+n <= len) && (maxsplit-- > 0)) {
1496 for (; j+n <= len; j++) {
1497 if (Py_STRING_MATCH(s, j, sub, n)) {
1498 SPLIT_ADD(s, i, j);
1499 i = j = j + n;
1500 break;
1501 }
1502 }
1503 }
1504#endif
1505 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001506 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 return list;
1508
Andrew Dalke525eab32006-05-26 14:00:45 +00001509 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 Py_DECREF(list);
1511 return NULL;
1512}
1513
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001514PyDoc_STRVAR(partition__doc__,
1515"S.partition(sep) -> (head, sep, tail)\n\
1516\n\
1517Searches for the separator sep in S, and returns the part before it,\n\
1518the separator itself, and the part after it. If the separator is not\n\
1519found, returns S and two empty strings.");
1520
1521static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001522string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001523{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001524 const char *sep;
1525 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001526
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001527 if (PyString_Check(sep_obj)) {
1528 sep = PyString_AS_STRING(sep_obj);
1529 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001530 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001531#ifdef Py_USING_UNICODE
1532 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001533 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001534#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001535 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001536 return NULL;
1537
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001538 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001539 (PyObject*) self,
1540 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1541 sep_obj, sep, sep_len
1542 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543}
1544
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001545PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001546"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001547\n\
1548Searches for the separator sep in S, starting at the end of S, and returns\n\
1549the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001550separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001551
1552static PyObject *
1553string_rpartition(PyStringObject *self, PyObject *sep_obj)
1554{
1555 const char *sep;
1556 Py_ssize_t sep_len;
1557
1558 if (PyString_Check(sep_obj)) {
1559 sep = PyString_AS_STRING(sep_obj);
1560 sep_len = PyString_GET_SIZE(sep_obj);
1561 }
1562#ifdef Py_USING_UNICODE
1563 else if (PyUnicode_Check(sep_obj))
1564 return PyUnicode_Partition((PyObject *) self, sep_obj);
1565#endif
1566 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1567 return NULL;
1568
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001569 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001570 (PyObject*) self,
1571 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1572 sep_obj, sep, sep_len
1573 );
1574}
1575
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001576Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001577rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001578{
Andrew Dalke525eab32006-05-26 14:00:45 +00001579 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001580 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001581 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582
1583 if (list == NULL)
1584 return NULL;
1585
Andrew Dalke02758d62006-05-26 15:21:01 +00001586 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001587
Andrew Dalke02758d62006-05-26 15:21:01 +00001588 while (maxsplit-- > 0) {
1589 RSKIP_SPACE(s, i);
1590 if (i<0) break;
1591 j = i; i--;
1592 RSKIP_NONSPACE(s, i);
1593 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001594 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001595 if (i >= 0) {
1596 /* Only occurs when maxsplit was reached */
1597 /* Skip any remaining whitespace and copy to beginning of string */
1598 RSKIP_SPACE(s, i);
1599 if (i >= 0)
1600 SPLIT_ADD(s, 0, i + 1);
1601
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001602 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001603 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001604 if (PyList_Reverse(list) < 0)
1605 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001606 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001607 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001608 Py_DECREF(list);
1609 return NULL;
1610}
1611
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001612Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001613rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001614{
Andrew Dalke525eab32006-05-26 14:00:45 +00001615 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001616 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001617 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001618
1619 if (list == NULL)
1620 return NULL;
1621
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001622 i = j = len - 1;
1623 while ((i >= 0) && (maxcount-- > 0)) {
1624 for (; i >= 0; i--) {
1625 if (s[i] == ch) {
1626 SPLIT_ADD(s, i + 1, j + 1);
1627 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001628 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001629 }
1630 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 }
1632 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001633 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001635 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001636 if (PyList_Reverse(list) < 0)
1637 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001638 return list;
1639
1640 onError:
1641 Py_DECREF(list);
1642 return NULL;
1643}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001644
1645PyDoc_STRVAR(rsplit__doc__,
1646"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1647\n\
1648Return a list of the words in the string S, using sep as the\n\
1649delimiter string, starting at the end of the string and working\n\
1650to the front. If maxsplit is given, at most maxsplit splits are\n\
1651done. If sep is not specified or is None, any whitespace string\n\
1652is a separator.");
1653
1654static PyObject *
1655string_rsplit(PyStringObject *self, PyObject *args)
1656{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001657 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001658 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001660 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001661
Martin v. Löwis9c830762006-04-13 08:37:17 +00001662 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001663 return NULL;
1664 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001665 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 if (subobj == Py_None)
1667 return rsplit_whitespace(s, len, maxsplit);
1668 if (PyString_Check(subobj)) {
1669 sub = PyString_AS_STRING(subobj);
1670 n = PyString_GET_SIZE(subobj);
1671 }
1672#ifdef Py_USING_UNICODE
1673 else if (PyUnicode_Check(subobj))
1674 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1675#endif
1676 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1677 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001678
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 if (n == 0) {
1680 PyErr_SetString(PyExc_ValueError, "empty separator");
1681 return NULL;
1682 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001683 else if (n == 1)
1684 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001685
Andrew Dalke525eab32006-05-26 14:00:45 +00001686 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001687 if (list == NULL)
1688 return NULL;
1689
1690 j = len;
1691 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001692
1693 while ( (i >= 0) && (maxsplit-- > 0) ) {
1694 for (; i>=0; i--) {
1695 if (Py_STRING_MATCH(s, i, sub, n)) {
1696 SPLIT_ADD(s, i + n, j);
1697 j = i;
1698 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001699 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001700 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001701 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001703 SPLIT_ADD(s, 0, j);
1704 FIX_PREALLOC_SIZE(list);
1705 if (PyList_Reverse(list) < 0)
1706 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707 return list;
1708
Andrew Dalke525eab32006-05-26 14:00:45 +00001709onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001710 Py_DECREF(list);
1711 return NULL;
1712}
1713
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001715PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716"S.join(sequence) -> string\n\
1717\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001718Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001719sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720
1721static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001722string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723{
1724 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001725 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001728 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001729 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001731 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732
Tim Peters19fe14e2001-01-19 03:03:47 +00001733 seq = PySequence_Fast(orig, "");
1734 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001735 return NULL;
1736 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001737
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001738 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001739 if (seqlen == 0) {
1740 Py_DECREF(seq);
1741 return PyString_FromString("");
1742 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001744 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001745 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1746 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001747 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001748 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001749 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001751
Raymond Hettinger674f2412004-08-23 23:23:54 +00001752 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001753 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001754 * Do a pre-pass to figure out the total amount of space we'll
1755 * need (sz), see whether any argument is absurd, and defer to
1756 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001757 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001758 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001759 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001760 item = PySequence_Fast_GET_ITEM(seq, i);
1761 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001762#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001764 /* Defer to Unicode join.
1765 * CAUTION: There's no gurantee that the
1766 * original sequence can be iterated over
1767 * again, so we must pass seq here.
1768 */
1769 PyObject *result;
1770 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001771 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001772 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001774#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001776 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001777 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001778 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001779 Py_DECREF(seq);
1780 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001781 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 sz += PyString_GET_SIZE(item);
1783 if (i != 0)
1784 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001785 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001786 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001787 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001788 Py_DECREF(seq);
1789 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 }
1792
1793 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001794 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 if (res == NULL) {
1796 Py_DECREF(seq);
1797 return NULL;
1798 }
1799
1800 /* Catenate everything. */
1801 p = PyString_AS_STRING(res);
1802 for (i = 0; i < seqlen; ++i) {
1803 size_t n;
1804 item = PySequence_Fast_GET_ITEM(seq, i);
1805 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001806 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001807 p += n;
1808 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001809 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001810 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001811 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001813
Jeremy Hylton49048292000-07-11 03:28:17 +00001814 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816}
1817
Tim Peters52e155e2001-06-16 05:42:57 +00001818PyObject *
1819_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001820{
Tim Petersa7259592001-06-16 05:11:17 +00001821 assert(sep != NULL && PyString_Check(sep));
1822 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001823 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001824}
1825
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001826Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001827string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001828{
1829 if (*end > len)
1830 *end = len;
1831 else if (*end < 0)
1832 *end += len;
1833 if (*end < 0)
1834 *end = 0;
1835 if (*start < 0)
1836 *start += len;
1837 if (*start < 0)
1838 *start = 0;
1839}
1840
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001841Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001842string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001844 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001845 const char *sub;
1846 Py_ssize_t sub_len;
1847 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001849 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1850 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001851 return -2;
1852 if (PyString_Check(subobj)) {
1853 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001854 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001855 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001856#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001858 return PyUnicode_Find(
1859 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001860#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001861 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001862 /* XXX - the "expected a character buffer object" is pretty
1863 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 return -2;
1865
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001866 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001867 return stringlib_find_slice(
1868 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1869 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001870 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001871 return stringlib_rfind_slice(
1872 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1873 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874}
1875
1876
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001877PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878"S.find(sub [,start [,end]]) -> int\n\
1879\n\
1880Return the lowest index in S where substring sub is found,\n\
1881such that sub is contained within s[start,end]. Optional\n\
1882arguments start and end are interpreted as in slice notation.\n\
1883\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001884Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885
1886static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001887string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001889 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 if (result == -2)
1891 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001892 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893}
1894
1895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001896PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897"S.index(sub [,start [,end]]) -> int\n\
1898\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001902string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (result == -2)
1906 return NULL;
1907 if (result == -1) {
1908 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001909 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 return NULL;
1911 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001912 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913}
1914
1915
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001916PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917"S.rfind(sub [,start [,end]]) -> int\n\
1918\n\
1919Return the highest index in S where substring sub is found,\n\
1920such that sub is contained within s[start,end]. Optional\n\
1921arguments start and end are interpreted as in slice notation.\n\
1922\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001923Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
1925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001926string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001928 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 if (result == -2)
1930 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001931 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932}
1933
1934
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001935PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936"S.rindex(sub [,start [,end]]) -> int\n\
1937\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 if (result == -2)
1945 return NULL;
1946 if (result == -1) {
1947 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001948 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949 return NULL;
1950 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001951 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952}
1953
1954
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001955Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001956do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1957{
1958 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001959 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001960 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001961 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1962 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963
1964 i = 0;
1965 if (striptype != RIGHTSTRIP) {
1966 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1967 i++;
1968 }
1969 }
1970
1971 j = len;
1972 if (striptype != LEFTSTRIP) {
1973 do {
1974 j--;
1975 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1976 j++;
1977 }
1978
1979 if (i == 0 && j == len && PyString_CheckExact(self)) {
1980 Py_INCREF(self);
1981 return (PyObject*)self;
1982 }
1983 else
1984 return PyString_FromStringAndSize(s+i, j-i);
1985}
1986
1987
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001988Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001989do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990{
1991 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001992 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994 i = 0;
1995 if (striptype != RIGHTSTRIP) {
1996 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1997 i++;
1998 }
1999 }
2000
2001 j = len;
2002 if (striptype != LEFTSTRIP) {
2003 do {
2004 j--;
2005 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2006 j++;
2007 }
2008
Tim Peters8fa5dd02001-09-12 02:18:30 +00002009 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010 Py_INCREF(self);
2011 return (PyObject*)self;
2012 }
2013 else
2014 return PyString_FromStringAndSize(s+i, j-i);
2015}
2016
2017
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002018Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002019do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2020{
2021 PyObject *sep = NULL;
2022
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002023 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002024 return NULL;
2025
2026 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002027 if (PyString_Check(sep))
2028 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002029#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002030 else if (PyUnicode_Check(sep)) {
2031 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2032 PyObject *res;
2033 if (uniself==NULL)
2034 return NULL;
2035 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2036 striptype, sep);
2037 Py_DECREF(uniself);
2038 return res;
2039 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002040#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002041 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002042#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002043 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002045 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002046#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002047 STRIPNAME(striptype));
2048 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002049 }
2050
2051 return do_strip(self, striptype);
2052}
2053
2054
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002055PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002056"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057\n\
2058Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002059whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002060If chars is given and not None, remove characters in chars instead.\n\
2061If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062
2063static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002066 if (PyTuple_GET_SIZE(args) == 0)
2067 return do_strip(self, BOTHSTRIP); /* Common case */
2068 else
2069 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070}
2071
2072
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002073PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002074"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002076Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002077If chars is given and not None, remove characters in chars instead.\n\
2078If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
2080static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002083 if (PyTuple_GET_SIZE(args) == 0)
2084 return do_strip(self, LEFTSTRIP); /* Common case */
2085 else
2086 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087}
2088
2089
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002090PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002091"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002093Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094If chars is given and not None, remove characters in chars instead.\n\
2095If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002100 if (PyTuple_GET_SIZE(args) == 0)
2101 return do_strip(self, RIGHTSTRIP); /* Common case */
2102 else
2103 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104}
2105
2106
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002107PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108"S.lower() -> string\n\
2109\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002110Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002112/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2113#ifndef _tolower
2114#define _tolower tolower
2115#endif
2116
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002118string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002120 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002121 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002122 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002124 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002125 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002127
2128 s = PyString_AS_STRING(newobj);
2129
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002130 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002133 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002134 if (isupper(c))
2135 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002137
Anthony Baxtera6286212006-04-11 07:42:36 +00002138 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139}
2140
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142"S.upper() -> string\n\
2143\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002144Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002146#ifndef _toupper
2147#define _toupper toupper
2148#endif
2149
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002151string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002153 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002154 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002155 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002157 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002158 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002160
2161 s = PyString_AS_STRING(newobj);
2162
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002163 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002166 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002167 if (islower(c))
2168 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002170
Anthony Baxtera6286212006-04-11 07:42:36 +00002171 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172}
2173
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002174PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175"S.title() -> string\n\
2176\n\
2177Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179
2180static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002181string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182{
2183 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002184 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002186 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187
Anthony Baxtera6286212006-04-11 07:42:36 +00002188 newobj = PyString_FromStringAndSize(NULL, n);
2189 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002191 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192 for (i = 0; i < n; i++) {
2193 int c = Py_CHARMASK(*s++);
2194 if (islower(c)) {
2195 if (!previous_is_cased)
2196 c = toupper(c);
2197 previous_is_cased = 1;
2198 } else if (isupper(c)) {
2199 if (previous_is_cased)
2200 c = tolower(c);
2201 previous_is_cased = 1;
2202 } else
2203 previous_is_cased = 0;
2204 *s_new++ = c;
2205 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002206 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207}
2208
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002209PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210"S.capitalize() -> string\n\
2211\n\
2212Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214
2215static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002216string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
2218 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002219 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002220 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221
Anthony Baxtera6286212006-04-11 07:42:36 +00002222 newobj = PyString_FromStringAndSize(NULL, n);
2223 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 if (0 < n) {
2227 int c = Py_CHARMASK(*s++);
2228 if (islower(c))
2229 *s_new = toupper(c);
2230 else
2231 *s_new = c;
2232 s_new++;
2233 }
2234 for (i = 1; i < n; i++) {
2235 int c = Py_CHARMASK(*s++);
2236 if (isupper(c))
2237 *s_new = tolower(c);
2238 else
2239 *s_new = c;
2240 s_new++;
2241 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002242 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243}
2244
2245
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002246PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247"S.count(sub[, start[, end]]) -> int\n\
2248\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002249Return the number of non-overlapping occurrences of substring sub in\n\
2250string S[start:end]. Optional arguments start and end are interpreted\n\
2251as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252
2253static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002254string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002256 PyObject *sub_obj;
2257 const char *str = PyString_AS_STRING(self), *sub;
2258 Py_ssize_t sub_len;
2259 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002261 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2262 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002264
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002265 if (PyString_Check(sub_obj)) {
2266 sub = PyString_AS_STRING(sub_obj);
2267 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002269#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002270 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002271 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002272 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002273 if (count == -1)
2274 return NULL;
2275 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002276 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002277 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002278#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002279 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 return NULL;
2281
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002282 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002283
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002284 return PyInt_FromSsize_t(
2285 stringlib_count(str + start, end - start, sub, sub_len)
2286 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287}
2288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002289PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290"S.swapcase() -> string\n\
2291\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294
2295static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002296string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297{
2298 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002299 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002300 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301
Anthony Baxtera6286212006-04-11 07:42:36 +00002302 newobj = PyString_FromStringAndSize(NULL, n);
2303 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002305 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306 for (i = 0; i < n; i++) {
2307 int c = Py_CHARMASK(*s++);
2308 if (islower(c)) {
2309 *s_new = toupper(c);
2310 }
2311 else if (isupper(c)) {
2312 *s_new = tolower(c);
2313 }
2314 else
2315 *s_new = c;
2316 s_new++;
2317 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002318 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319}
2320
2321
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002322PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323"S.translate(table [,deletechars]) -> string\n\
2324\n\
2325Return a copy of the string S, where all characters occurring\n\
2326in the optional argument deletechars are removed, and the\n\
2327remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329
2330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002331string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 register char *input, *output;
2334 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002335 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002338 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 PyObject *result;
2340 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002343 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002344 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346
2347 if (PyString_Check(tableobj)) {
2348 table1 = PyString_AS_STRING(tableobj);
2349 tablen = PyString_GET_SIZE(tableobj);
2350 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002351#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002353 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354 parameter; instead a mapping to None will cause characters
2355 to be deleted. */
2356 if (delobj != NULL) {
2357 PyErr_SetString(PyExc_TypeError,
2358 "deletions are implemented differently for unicode");
2359 return NULL;
2360 }
2361 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002363#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002366
Martin v. Löwis00b61272002-12-12 20:03:19 +00002367 if (tablen != 256) {
2368 PyErr_SetString(PyExc_ValueError,
2369 "translation table must be 256 characters long");
2370 return NULL;
2371 }
2372
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 if (delobj != NULL) {
2374 if (PyString_Check(delobj)) {
2375 del_table = PyString_AS_STRING(delobj);
2376 dellen = PyString_GET_SIZE(delobj);
2377 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002378#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 else if (PyUnicode_Check(delobj)) {
2380 PyErr_SetString(PyExc_TypeError,
2381 "deletions are implemented differently for unicode");
2382 return NULL;
2383 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002384#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2386 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 }
2388 else {
2389 del_table = NULL;
2390 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 }
2392
2393 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002394 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 result = PyString_FromStringAndSize((char *)NULL, inlen);
2396 if (result == NULL)
2397 return NULL;
2398 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002399 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400
2401 if (dellen == 0) {
2402 /* If no deletions are required, use faster code */
2403 for (i = inlen; --i >= 0; ) {
2404 c = Py_CHARMASK(*input++);
2405 if (Py_CHARMASK((*output++ = table[c])) != c)
2406 changed = 1;
2407 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002408 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 return result;
2410 Py_DECREF(result);
2411 Py_INCREF(input_obj);
2412 return input_obj;
2413 }
2414
2415 for (i = 0; i < 256; i++)
2416 trans_table[i] = Py_CHARMASK(table[i]);
2417
2418 for (i = 0; i < dellen; i++)
2419 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2420
2421 for (i = inlen; --i >= 0; ) {
2422 c = Py_CHARMASK(*input++);
2423 if (trans_table[c] != -1)
2424 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2425 continue;
2426 changed = 1;
2427 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002428 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 Py_DECREF(result);
2430 Py_INCREF(input_obj);
2431 return input_obj;
2432 }
2433 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002434 if (inlen > 0)
2435 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 return result;
2437}
2438
2439
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002440#define FORWARD 1
2441#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002443/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002445#define findchar(target, target_len, c) \
2446 ((char *)memchr((const void *)(target), c, target_len))
2447
2448/* String ops must return a string. */
2449/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002450Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002451return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002453 if (PyString_CheckExact(self)) {
2454 Py_INCREF(self);
2455 return self;
2456 }
2457 return (PyStringObject *)PyString_FromStringAndSize(
2458 PyString_AS_STRING(self),
2459 PyString_GET_SIZE(self));
2460}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002461
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002462Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002463countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002464{
2465 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002466 const char *start=target;
2467 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002468
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002469 while ( (start=findchar(start, end-start, c)) != NULL ) {
2470 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002471 if (count >= maxcount)
2472 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002473 start += 1;
2474 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002475 return count;
2476}
2477
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002478Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002479findstring(const char *target, Py_ssize_t target_len,
2480 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002481 Py_ssize_t start,
2482 Py_ssize_t end,
2483 int direction)
2484{
2485 if (start < 0) {
2486 start += target_len;
2487 if (start < 0)
2488 start = 0;
2489 }
2490 if (end > target_len) {
2491 end = target_len;
2492 } else if (end < 0) {
2493 end += target_len;
2494 if (end < 0)
2495 end = 0;
2496 }
2497
2498 /* zero-length substrings always match at the first attempt */
2499 if (pattern_len == 0)
2500 return (direction > 0) ? start : end;
2501
2502 end -= pattern_len;
2503
2504 if (direction < 0) {
2505 for (; end >= start; end--)
2506 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2507 return end;
2508 } else {
2509 for (; start <= end; start++)
2510 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2511 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512 }
2513 return -1;
2514}
2515
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002516Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002517countstring(const char *target, Py_ssize_t target_len,
2518 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002519 Py_ssize_t start,
2520 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002521 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002523 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002524
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002525 if (start < 0) {
2526 start += target_len;
2527 if (start < 0)
2528 start = 0;
2529 }
2530 if (end > target_len) {
2531 end = target_len;
2532 } else if (end < 0) {
2533 end += target_len;
2534 if (end < 0)
2535 end = 0;
2536 }
2537
2538 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002539 if (pattern_len == 0 || maxcount == 0) {
2540 if (target_len+1 < maxcount)
2541 return target_len+1;
2542 return maxcount;
2543 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002544
2545 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002546 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002547 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002548 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2549 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002550 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002551 end -= pattern_len-1;
2552 }
2553 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002554 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002555 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2556 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002557 if (--maxcount <= 0)
2558 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 start += pattern_len-1;
2560 }
2561 }
2562 return count;
2563}
2564
2565
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002566/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002567
2568/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002569Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002571 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002572 Py_ssize_t maxcount)
2573{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002574 char *self_s, *result_s;
2575 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002576 Py_ssize_t count, i, product;
2577 PyStringObject *result;
2578
2579 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002580
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002581 /* 1 at the end plus 1 after every character */
2582 count = self_len+1;
2583 if (maxcount < count)
2584 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002585
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002586 /* Check for overflow */
2587 /* result_len = count * to_len + self_len; */
2588 product = count * to_len;
2589 if (product / to_len != count) {
2590 PyErr_SetString(PyExc_OverflowError,
2591 "replace string is too long");
2592 return NULL;
2593 }
2594 result_len = product + self_len;
2595 if (result_len < 0) {
2596 PyErr_SetString(PyExc_OverflowError,
2597 "replace string is too long");
2598 return NULL;
2599 }
2600
2601 if (! (result = (PyStringObject *)
2602 PyString_FromStringAndSize(NULL, result_len)) )
2603 return NULL;
2604
2605 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002606 result_s = PyString_AS_STRING(result);
2607
2608 /* TODO: special case single character, which doesn't need memcpy */
2609
2610 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002611 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002612 result_s += to_len;
2613 count -= 1;
2614
2615 for (i=0; i<count; i++) {
2616 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002617 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002618 result_s += to_len;
2619 }
2620
2621 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002622 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002623
2624 return result;
2625}
2626
2627/* Special case for deleting a single character */
2628/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002629Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002630replace_delete_single_character(PyStringObject *self,
2631 char from_c, Py_ssize_t maxcount)
2632{
2633 char *self_s, *result_s;
2634 char *start, *next, *end;
2635 Py_ssize_t self_len, result_len;
2636 Py_ssize_t count;
2637 PyStringObject *result;
2638
2639 self_len = PyString_GET_SIZE(self);
2640 self_s = PyString_AS_STRING(self);
2641
Andrew Dalke51324072006-05-26 20:25:22 +00002642 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002643 if (count == 0) {
2644 return return_self(self);
2645 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646
2647 result_len = self_len - count; /* from_len == 1 */
2648 assert(result_len>=0);
2649
2650 if ( (result = (PyStringObject *)
2651 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2652 return NULL;
2653 result_s = PyString_AS_STRING(result);
2654
2655 start = self_s;
2656 end = self_s + self_len;
2657 while (count-- > 0) {
2658 next = findchar(start, end-start, from_c);
2659 if (next == NULL)
2660 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002661 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662 result_s += (next-start);
2663 start = next+1;
2664 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002665 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002666
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002667 return result;
2668}
2669
2670/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2671
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002672Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002673replace_delete_substring(PyStringObject *self,
2674 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002676 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002678 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002679 Py_ssize_t count, offset;
2680 PyStringObject *result;
2681
2682 self_len = PyString_GET_SIZE(self);
2683 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002684
2685 count = countstring(self_s, self_len,
2686 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002687 0, self_len, 1,
2688 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002689
2690 if (count == 0) {
2691 /* no matches */
2692 return return_self(self);
2693 }
2694
2695 result_len = self_len - (count * from_len);
2696 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002697
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002698 if ( (result = (PyStringObject *)
2699 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2700 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002701
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002702 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002703
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704 start = self_s;
2705 end = self_s + self_len;
2706 while (count-- > 0) {
2707 offset = findstring(start, end-start,
2708 from_s, from_len,
2709 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002710 if (offset == -1)
2711 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002712 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002713
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002714 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002715
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002716 result_s += (next-start);
2717 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002718 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002719 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002720 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002721}
2722
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002724Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002725replace_single_character_in_place(PyStringObject *self,
2726 char from_c, char to_c,
2727 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002728{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002729 char *self_s, *result_s, *start, *end, *next;
2730 Py_ssize_t self_len;
2731 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002732
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733 /* The result string will be the same size */
2734 self_s = PyString_AS_STRING(self);
2735 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002736
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002738
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002739 if (next == NULL) {
2740 /* No matches; return the original string */
2741 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002742 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002743
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002745 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002746 if (result == NULL)
2747 return NULL;
2748 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002749 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002750
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751 /* change everything in-place, starting with this one */
2752 start = result_s + (next-self_s);
2753 *start = to_c;
2754 start++;
2755 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002756
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757 while (--maxcount > 0) {
2758 next = findchar(start, end-start, from_c);
2759 if (next == NULL)
2760 break;
2761 *next = to_c;
2762 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002763 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002764
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002765 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002766}
2767
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002768/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002769Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002770replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002771 const char *from_s, Py_ssize_t from_len,
2772 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773 Py_ssize_t maxcount)
2774{
2775 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002776 char *self_s;
2777 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002779
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002781
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002782 self_s = PyString_AS_STRING(self);
2783 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002784
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785 offset = findstring(self_s, self_len,
2786 from_s, from_len,
2787 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 if (offset == -1) {
2789 /* No matches; return the original string */
2790 return return_self(self);
2791 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002792
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002794 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 if (result == NULL)
2796 return NULL;
2797 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002798 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 /* change everything in-place, starting with this one */
2801 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002802 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 start += from_len;
2804 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002805
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 while ( --maxcount > 0) {
2807 offset = findstring(start, end-start,
2808 from_s, from_len,
2809 0, end-start, FORWARD);
2810 if (offset==-1)
2811 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002812 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813 start += offset+from_len;
2814 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002815
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 return result;
2817}
2818
2819/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002820Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821replace_single_character(PyStringObject *self,
2822 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002823 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824 Py_ssize_t maxcount)
2825{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002826 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002828 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 Py_ssize_t count, product;
2830 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002831
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 self_s = PyString_AS_STRING(self);
2833 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002834
Andrew Dalke51324072006-05-26 20:25:22 +00002835 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 if (count == 0) {
2837 /* no matches, return unchanged */
2838 return return_self(self);
2839 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002840
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841 /* use the difference between current and new, hence the "-1" */
2842 /* result_len = self_len + count * (to_len-1) */
2843 product = count * (to_len-1);
2844 if (product / (to_len-1) != count) {
2845 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2846 return NULL;
2847 }
2848 result_len = self_len + product;
2849 if (result_len < 0) {
2850 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2851 return NULL;
2852 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002853
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002854 if ( (result = (PyStringObject *)
2855 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2856 return NULL;
2857 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002858
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002859 start = self_s;
2860 end = self_s + self_len;
2861 while (count-- > 0) {
2862 next = findchar(start, end-start, from_c);
2863 if (next == NULL)
2864 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002865
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002866 if (next == start) {
2867 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002868 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 result_s += to_len;
2870 start += 1;
2871 } else {
2872 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002873 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002875 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002876 result_s += to_len;
2877 start = next+1;
2878 }
2879 }
2880 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002881 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002882
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002883 return result;
2884}
2885
2886/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002887Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002889 const char *from_s, Py_ssize_t from_len,
2890 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002892 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002893 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002894 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895 Py_ssize_t count, offset, product;
2896 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002897
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 self_s = PyString_AS_STRING(self);
2899 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002900
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 count = countstring(self_s, self_len,
2902 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002903 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002904 if (count == 0) {
2905 /* no matches, return unchanged */
2906 return return_self(self);
2907 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002908
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909 /* Check for overflow */
2910 /* result_len = self_len + count * (to_len-from_len) */
2911 product = count * (to_len-from_len);
2912 if (product / (to_len-from_len) != count) {
2913 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2914 return NULL;
2915 }
2916 result_len = self_len + product;
2917 if (result_len < 0) {
2918 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2919 return NULL;
2920 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002921
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002922 if ( (result = (PyStringObject *)
2923 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2924 return NULL;
2925 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002926
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002927 start = self_s;
2928 end = self_s + self_len;
2929 while (count-- > 0) {
2930 offset = findstring(start, end-start,
2931 from_s, from_len,
2932 0, end-start, FORWARD);
2933 if (offset == -1)
2934 break;
2935 next = start+offset;
2936 if (next == start) {
2937 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002938 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002939 result_s += to_len;
2940 start += from_len;
2941 } else {
2942 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002943 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002944 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002945 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002946 result_s += to_len;
2947 start = next+from_len;
2948 }
2949 }
2950 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002951 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002952
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002953 return result;
2954}
2955
2956
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002957Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002958replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002959 const char *from_s, Py_ssize_t from_len,
2960 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 Py_ssize_t maxcount)
2962{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002963 if (maxcount < 0) {
2964 maxcount = PY_SSIZE_T_MAX;
2965 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2966 /* nothing to do; return the original string */
2967 return return_self(self);
2968 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002969
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002970 if (maxcount == 0 ||
2971 (from_len == 0 && to_len == 0)) {
2972 /* nothing to do; return the original string */
2973 return return_self(self);
2974 }
2975
2976 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002977
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 if (from_len == 0) {
2979 /* insert the 'to' string everywhere. */
2980 /* >>> "Python".replace("", ".") */
2981 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002982 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 }
2984
2985 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2986 /* point for an empty self string to generate a non-empty string */
2987 /* Special case so the remaining code always gets a non-empty string */
2988 if (PyString_GET_SIZE(self) == 0) {
2989 return return_self(self);
2990 }
2991
2992 if (to_len == 0) {
2993 /* delete all occurances of 'from' string */
2994 if (from_len == 1) {
2995 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002996 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002997 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002998 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002999 }
3000 }
3001
3002 /* Handle special case where both strings have the same length */
3003
3004 if (from_len == to_len) {
3005 if (from_len == 1) {
3006 return replace_single_character_in_place(
3007 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003008 from_s[0],
3009 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003010 maxcount);
3011 } else {
3012 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003013 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003014 }
3015 }
3016
3017 /* Otherwise use the more generic algorithms */
3018 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003019 return replace_single_character(self, from_s[0],
3020 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003021 } else {
3022 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003023 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 }
3025}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003026
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003027PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003028"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003029\n\
3030Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003031old replaced by new. If the optional argument count is\n\
3032given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003033
3034static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003035string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003036{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003037 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003038 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003039 const char *from_s, *to_s;
3040 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003043 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003044
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003045 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003046 from_s = PyString_AS_STRING(from);
3047 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003049#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003050 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003051 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003052 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003053#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003054 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003055 return NULL;
3056
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003058 to_s = PyString_AS_STRING(to);
3059 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003061#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003063 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003064 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003065#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003066 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 return NULL;
3068
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003069 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003070 from_s, from_len,
3071 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003072}
3073
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003074/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003076/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003077 * against substr, using the start and end arguments. Returns
3078 * -1 on error, 0 if not found and 1 if found.
3079 */
3080Py_LOCAL(int)
3081_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3082 Py_ssize_t end, int direction)
3083{
3084 Py_ssize_t len = PyString_GET_SIZE(self);
3085 Py_ssize_t slen;
3086 const char* sub;
3087 const char* str;
3088
3089 if (PyString_Check(substr)) {
3090 sub = PyString_AS_STRING(substr);
3091 slen = PyString_GET_SIZE(substr);
3092 }
3093#ifdef Py_USING_UNICODE
3094 else if (PyUnicode_Check(substr))
3095 return PyUnicode_Tailmatch((PyObject *)self,
3096 substr, start, end, direction);
3097#endif
3098 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3099 return -1;
3100 str = PyString_AS_STRING(self);
3101
3102 string_adjust_indices(&start, &end, len);
3103
3104 if (direction < 0) {
3105 /* startswith */
3106 if (start+slen > len)
3107 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003108 } else {
3109 /* endswith */
3110 if (end-start < slen || start > len)
3111 return 0;
3112
3113 if (end-slen > start)
3114 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003115 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003116 if (end-start >= slen)
3117 return ! memcmp(str+start, sub, slen);
3118 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003119}
3120
3121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003122PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003123"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003124\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003125Return True if S starts with the specified prefix, False otherwise.\n\
3126With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003127With optional end, stop comparing S at that position.\n\
3128prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003129
3130static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003131string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003133 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003134 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003136 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
Guido van Rossumc6821402000-05-08 14:08:05 +00003138 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3139 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003141 if (PyTuple_Check(subobj)) {
3142 Py_ssize_t i;
3143 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3144 result = _string_tailmatch(self,
3145 PyTuple_GET_ITEM(subobj, i),
3146 start, end, -1);
3147 if (result == -1)
3148 return NULL;
3149 else if (result) {
3150 Py_RETURN_TRUE;
3151 }
3152 }
3153 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 }
Georg Brandl24250812006-06-09 18:45:48 +00003155 result = _string_tailmatch(self, subobj, start, end, -1);
3156 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003157 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003158 else
Georg Brandl24250812006-06-09 18:45:48 +00003159 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003160}
3161
3162
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003163PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003164"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003165\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003166Return True if S ends with the specified suffix, False otherwise.\n\
3167With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003168With optional end, stop comparing S at that position.\n\
3169suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003170
3171static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003172string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003173{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003174 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003175 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003177 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178
Guido van Rossumc6821402000-05-08 14:08:05 +00003179 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3180 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003182 if (PyTuple_Check(subobj)) {
3183 Py_ssize_t i;
3184 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3185 result = _string_tailmatch(self,
3186 PyTuple_GET_ITEM(subobj, i),
3187 start, end, +1);
3188 if (result == -1)
3189 return NULL;
3190 else if (result) {
3191 Py_RETURN_TRUE;
3192 }
3193 }
3194 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 }
Georg Brandl24250812006-06-09 18:45:48 +00003196 result = _string_tailmatch(self, subobj, start, end, +1);
3197 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003198 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003199 else
Georg Brandl24250812006-06-09 18:45:48 +00003200 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003201}
3202
3203
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003204PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003205"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003206\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003207Encodes S using the codec registered for encoding. encoding defaults\n\
3208to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003209handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003210a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3211'xmlcharrefreplace' as well as any other name registered with\n\
3212codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003213
3214static PyObject *
3215string_encode(PyStringObject *self, PyObject *args)
3216{
3217 char *encoding = NULL;
3218 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003219 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003220
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3222 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003223 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003224 if (v == NULL)
3225 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003226 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3227 PyErr_Format(PyExc_TypeError,
3228 "encoder did not return a string/unicode object "
3229 "(type=%.400s)",
3230 v->ob_type->tp_name);
3231 Py_DECREF(v);
3232 return NULL;
3233 }
3234 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003235
3236 onError:
3237 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003238}
3239
3240
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003241PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003242"S.decode([encoding[,errors]]) -> object\n\
3243\n\
3244Decodes S using the codec registered for encoding. encoding defaults\n\
3245to the default encoding. errors may be given to set a different error\n\
3246handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003247a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3248as well as any other name registerd with codecs.register_error that is\n\
3249able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003250
3251static PyObject *
3252string_decode(PyStringObject *self, PyObject *args)
3253{
3254 char *encoding = NULL;
3255 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003256 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003257
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003258 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3259 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003260 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003261 if (v == NULL)
3262 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003263 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3264 PyErr_Format(PyExc_TypeError,
3265 "decoder did not return a string/unicode object "
3266 "(type=%.400s)",
3267 v->ob_type->tp_name);
3268 Py_DECREF(v);
3269 return NULL;
3270 }
3271 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003272
3273 onError:
3274 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003275}
3276
3277
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003278PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003279"S.expandtabs([tabsize]) -> string\n\
3280\n\
3281Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003282If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003283
3284static PyObject*
3285string_expandtabs(PyStringObject *self, PyObject *args)
3286{
3287 const char *e, *p;
3288 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003289 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290 PyObject *u;
3291 int tabsize = 8;
3292
3293 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3294 return NULL;
3295
Thomas Wouters7e474022000-07-16 12:04:32 +00003296 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003297 i = j = 0;
3298 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3299 for (p = PyString_AS_STRING(self); p < e; p++)
3300 if (*p == '\t') {
3301 if (tabsize > 0)
3302 j += tabsize - (j % tabsize);
3303 }
3304 else {
3305 j++;
3306 if (*p == '\n' || *p == '\r') {
3307 i += j;
3308 j = 0;
3309 }
3310 }
3311
3312 /* Second pass: create output string and fill it */
3313 u = PyString_FromStringAndSize(NULL, i + j);
3314 if (!u)
3315 return NULL;
3316
3317 j = 0;
3318 q = PyString_AS_STRING(u);
3319
3320 for (p = PyString_AS_STRING(self); p < e; p++)
3321 if (*p == '\t') {
3322 if (tabsize > 0) {
3323 i = tabsize - (j % tabsize);
3324 j += i;
3325 while (i--)
3326 *q++ = ' ';
3327 }
3328 }
3329 else {
3330 j++;
3331 *q++ = *p;
3332 if (*p == '\n' || *p == '\r')
3333 j = 0;
3334 }
3335
3336 return u;
3337}
3338
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003339Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003340pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341{
3342 PyObject *u;
3343
3344 if (left < 0)
3345 left = 0;
3346 if (right < 0)
3347 right = 0;
3348
Tim Peters8fa5dd02001-09-12 02:18:30 +00003349 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003350 Py_INCREF(self);
3351 return (PyObject *)self;
3352 }
3353
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003354 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003355 left + PyString_GET_SIZE(self) + right);
3356 if (u) {
3357 if (left)
3358 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003359 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003360 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003361 PyString_GET_SIZE(self));
3362 if (right)
3363 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3364 fill, right);
3365 }
3366
3367 return u;
3368}
3369
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003370PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003371"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003372"\n"
3373"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003374"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003375
3376static PyObject *
3377string_ljust(PyStringObject *self, PyObject *args)
3378{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003379 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003380 char fillchar = ' ';
3381
Thomas Wouters4abb3662006-04-19 14:50:15 +00003382 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003383 return NULL;
3384
Tim Peters8fa5dd02001-09-12 02:18:30 +00003385 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386 Py_INCREF(self);
3387 return (PyObject*) self;
3388 }
3389
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003390 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003391}
3392
3393
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003394PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003395"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003396"\n"
3397"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003398"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399
3400static PyObject *
3401string_rjust(PyStringObject *self, PyObject *args)
3402{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003403 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003404 char fillchar = ' ';
3405
Thomas Wouters4abb3662006-04-19 14:50:15 +00003406 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 return NULL;
3408
Tim Peters8fa5dd02001-09-12 02:18:30 +00003409 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 Py_INCREF(self);
3411 return (PyObject*) self;
3412 }
3413
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003414 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415}
3416
3417
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003418PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003419"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003420"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421"Return S centered in a string of length width. Padding is\n"
3422"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423
3424static PyObject *
3425string_center(PyStringObject *self, PyObject *args)
3426{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003427 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003428 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003429 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430
Thomas Wouters4abb3662006-04-19 14:50:15 +00003431 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003432 return NULL;
3433
Tim Peters8fa5dd02001-09-12 02:18:30 +00003434 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435 Py_INCREF(self);
3436 return (PyObject*) self;
3437 }
3438
3439 marg = width - PyString_GET_SIZE(self);
3440 left = marg / 2 + (marg & width & 1);
3441
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443}
3444
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003445PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003446"S.zfill(width) -> string\n"
3447"\n"
3448"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003449"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003450
3451static PyObject *
3452string_zfill(PyStringObject *self, PyObject *args)
3453{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003454 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003455 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003456 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003457 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003458
Thomas Wouters4abb3662006-04-19 14:50:15 +00003459 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003460 return NULL;
3461
3462 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003463 if (PyString_CheckExact(self)) {
3464 Py_INCREF(self);
3465 return (PyObject*) self;
3466 }
3467 else
3468 return PyString_FromStringAndSize(
3469 PyString_AS_STRING(self),
3470 PyString_GET_SIZE(self)
3471 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003472 }
3473
3474 fill = width - PyString_GET_SIZE(self);
3475
3476 s = pad(self, fill, 0, '0');
3477
3478 if (s == NULL)
3479 return NULL;
3480
3481 p = PyString_AS_STRING(s);
3482 if (p[fill] == '+' || p[fill] == '-') {
3483 /* move sign to beginning of string */
3484 p[0] = p[fill];
3485 p[fill] = '0';
3486 }
3487
3488 return (PyObject*) s;
3489}
3490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003491PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003492"S.isspace() -> bool\n\
3493\n\
3494Return True if all characters in S are whitespace\n\
3495and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003496
3497static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003498string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003499{
Fred Drakeba096332000-07-09 07:04:36 +00003500 register const unsigned char *p
3501 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003502 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003503
Guido van Rossum4c08d552000-03-10 22:55:18 +00003504 /* Shortcut for single character strings */
3505 if (PyString_GET_SIZE(self) == 1 &&
3506 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003507 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003508
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003509 /* Special case for empty strings */
3510 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003511 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003512
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513 e = p + PyString_GET_SIZE(self);
3514 for (; p < e; p++) {
3515 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003516 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003517 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003519}
3520
3521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003522PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003523"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003524\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003525Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003526and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003527
3528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003529string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530{
Fred Drakeba096332000-07-09 07:04:36 +00003531 register const unsigned char *p
3532 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003533 register const unsigned char *e;
3534
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003535 /* Shortcut for single character strings */
3536 if (PyString_GET_SIZE(self) == 1 &&
3537 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003538 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003539
3540 /* Special case for empty strings */
3541 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003542 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003543
3544 e = p + PyString_GET_SIZE(self);
3545 for (; p < e; p++) {
3546 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003547 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003548 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550}
3551
3552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003553PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003554"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003555\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003556Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003557and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558
3559static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003560string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561{
Fred Drakeba096332000-07-09 07:04:36 +00003562 register const unsigned char *p
3563 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003564 register const unsigned char *e;
3565
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566 /* Shortcut for single character strings */
3567 if (PyString_GET_SIZE(self) == 1 &&
3568 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003569 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003570
3571 /* Special case for empty strings */
3572 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574
3575 e = p + PyString_GET_SIZE(self);
3576 for (; p < e; p++) {
3577 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003578 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581}
3582
3583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003584PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003585"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003587Return True if all characters in S are digits\n\
3588and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589
3590static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003591string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592{
Fred Drakeba096332000-07-09 07:04:36 +00003593 register const unsigned char *p
3594 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003595 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003596
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597 /* Shortcut for single character strings */
3598 if (PyString_GET_SIZE(self) == 1 &&
3599 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003600 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003602 /* Special case for empty strings */
3603 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003605
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606 e = p + PyString_GET_SIZE(self);
3607 for (; p < e; p++) {
3608 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003610 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612}
3613
3614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003615PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003616"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003617\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003619at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620
3621static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003622string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623{
Fred Drakeba096332000-07-09 07:04:36 +00003624 register const unsigned char *p
3625 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003626 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627 int cased;
3628
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 /* Shortcut for single character strings */
3630 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003631 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003633 /* Special case for empty strings */
3634 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003636
Guido van Rossum4c08d552000-03-10 22:55:18 +00003637 e = p + PyString_GET_SIZE(self);
3638 cased = 0;
3639 for (; p < e; p++) {
3640 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 else if (!cased && islower(*p))
3643 cased = 1;
3644 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646}
3647
3648
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003649PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003652Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003653at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654
3655static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003656string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657{
Fred Drakeba096332000-07-09 07:04:36 +00003658 register const unsigned char *p
3659 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003660 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661 int cased;
3662
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663 /* Shortcut for single character strings */
3664 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003667 /* Special case for empty strings */
3668 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003670
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671 e = p + PyString_GET_SIZE(self);
3672 cased = 0;
3673 for (; p < e; p++) {
3674 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676 else if (!cased && isupper(*p))
3677 cased = 1;
3678 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680}
3681
3682
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003683PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003684"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003685\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003686Return True if S is a titlecased string and there is at least one\n\
3687character in S, i.e. uppercase characters may only follow uncased\n\
3688characters and lowercase characters only cased ones. Return False\n\
3689otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690
3691static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003692string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693{
Fred Drakeba096332000-07-09 07:04:36 +00003694 register const unsigned char *p
3695 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003696 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697 int cased, previous_is_cased;
3698
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 /* Shortcut for single character strings */
3700 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003703 /* Special case for empty strings */
3704 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003706
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 e = p + PyString_GET_SIZE(self);
3708 cased = 0;
3709 previous_is_cased = 0;
3710 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003711 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712
3713 if (isupper(ch)) {
3714 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003715 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716 previous_is_cased = 1;
3717 cased = 1;
3718 }
3719 else if (islower(ch)) {
3720 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003721 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 previous_is_cased = 1;
3723 cased = 1;
3724 }
3725 else
3726 previous_is_cased = 0;
3727 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729}
3730
3731
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003732PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003733"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734\n\
3735Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003736Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003737is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739static PyObject*
3740string_splitlines(PyStringObject *self, PyObject *args)
3741{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003742 register Py_ssize_t i;
3743 register Py_ssize_t j;
3744 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003745 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746 PyObject *list;
3747 PyObject *str;
3748 char *data;
3749
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003750 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751 return NULL;
3752
3753 data = PyString_AS_STRING(self);
3754 len = PyString_GET_SIZE(self);
3755
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003756 /* This does not use the preallocated list because splitlines is
3757 usually run with hundreds of newlines. The overhead of
3758 switching between PyList_SET_ITEM and append causes about a
3759 2-3% slowdown for that common case. A smarter implementation
3760 could move the if check out, so the SET_ITEMs are done first
3761 and the appends only done when the prealloc buffer is full.
3762 That's too much work for little gain.*/
3763
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764 list = PyList_New(0);
3765 if (!list)
3766 goto onError;
3767
3768 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003769 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003770
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771 /* Find a line and append it */
3772 while (i < len && data[i] != '\n' && data[i] != '\r')
3773 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774
3775 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003776 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 if (i < len) {
3778 if (data[i] == '\r' && i + 1 < len &&
3779 data[i+1] == '\n')
3780 i += 2;
3781 else
3782 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003783 if (keepends)
3784 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003785 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003786 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 j = i;
3788 }
3789 if (j < len) {
3790 SPLIT_APPEND(data, j, len);
3791 }
3792
3793 return list;
3794
3795 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003796 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 return NULL;
3798}
3799
3800#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003801#undef SPLIT_ADD
3802#undef MAX_PREALLOC
3803#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003805static PyObject *
3806string_getnewargs(PyStringObject *v)
3807{
3808 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3809}
3810
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003811
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003812static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003813string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814 /* Counterparts of the obsolete stropmodule functions; except
3815 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003816 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3817 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003818 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003819 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3820 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003821 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3822 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3823 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3824 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3825 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3826 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3827 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3829 capitalize__doc__},
3830 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3831 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3832 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003833 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003834 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3835 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3836 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3837 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3838 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3839 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3840 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003841 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3842 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003843 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3844 startswith__doc__},
3845 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3846 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3847 swapcase__doc__},
3848 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3849 translate__doc__},
3850 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3851 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3852 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3853 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3854 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3855 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3856 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3857 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3858 expandtabs__doc__},
3859 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3860 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003861 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003862 {NULL, NULL} /* sentinel */
3863};
3864
Jeremy Hylton938ace62002-07-17 16:30:39 +00003865static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003866str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3867
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003868static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003869string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003870{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003871 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003872 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003873
Guido van Rossumae960af2001-08-30 03:11:59 +00003874 if (type != &PyString_Type)
3875 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003876 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3877 return NULL;
3878 if (x == NULL)
3879 return PyString_FromString("");
3880 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003881}
3882
Guido van Rossumae960af2001-08-30 03:11:59 +00003883static PyObject *
3884str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3885{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003886 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003887 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003888
3889 assert(PyType_IsSubtype(type, &PyString_Type));
3890 tmp = string_new(&PyString_Type, args, kwds);
3891 if (tmp == NULL)
3892 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003893 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003894 n = PyString_GET_SIZE(tmp);
3895 pnew = type->tp_alloc(type, n);
3896 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003897 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003898 ((PyStringObject *)pnew)->ob_shash =
3899 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003900 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003901 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003902 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003903 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003904}
3905
Guido van Rossumcacfc072002-05-24 19:01:59 +00003906static PyObject *
3907basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3908{
3909 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003910 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003911 return NULL;
3912}
3913
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003914static PyObject *
3915string_mod(PyObject *v, PyObject *w)
3916{
3917 if (!PyString_Check(v)) {
3918 Py_INCREF(Py_NotImplemented);
3919 return Py_NotImplemented;
3920 }
3921 return PyString_Format(v, w);
3922}
3923
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003924PyDoc_STRVAR(basestring_doc,
3925"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003926
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003927static PyNumberMethods string_as_number = {
3928 0, /*nb_add*/
3929 0, /*nb_subtract*/
3930 0, /*nb_multiply*/
3931 0, /*nb_divide*/
3932 string_mod, /*nb_remainder*/
3933};
3934
3935
Guido van Rossumcacfc072002-05-24 19:01:59 +00003936PyTypeObject PyBaseString_Type = {
3937 PyObject_HEAD_INIT(&PyType_Type)
3938 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003939 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003940 0,
3941 0,
3942 0, /* tp_dealloc */
3943 0, /* tp_print */
3944 0, /* tp_getattr */
3945 0, /* tp_setattr */
3946 0, /* tp_compare */
3947 0, /* tp_repr */
3948 0, /* tp_as_number */
3949 0, /* tp_as_sequence */
3950 0, /* tp_as_mapping */
3951 0, /* tp_hash */
3952 0, /* tp_call */
3953 0, /* tp_str */
3954 0, /* tp_getattro */
3955 0, /* tp_setattro */
3956 0, /* tp_as_buffer */
3957 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3958 basestring_doc, /* tp_doc */
3959 0, /* tp_traverse */
3960 0, /* tp_clear */
3961 0, /* tp_richcompare */
3962 0, /* tp_weaklistoffset */
3963 0, /* tp_iter */
3964 0, /* tp_iternext */
3965 0, /* tp_methods */
3966 0, /* tp_members */
3967 0, /* tp_getset */
3968 &PyBaseObject_Type, /* tp_base */
3969 0, /* tp_dict */
3970 0, /* tp_descr_get */
3971 0, /* tp_descr_set */
3972 0, /* tp_dictoffset */
3973 0, /* tp_init */
3974 0, /* tp_alloc */
3975 basestring_new, /* tp_new */
3976 0, /* tp_free */
3977};
3978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003979PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003980"str(object) -> string\n\
3981\n\
3982Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003983If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003984
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003985PyTypeObject PyString_Type = {
3986 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003987 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003988 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003989 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003990 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003991 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003992 (printfunc)string_print, /* tp_print */
3993 0, /* tp_getattr */
3994 0, /* tp_setattr */
3995 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003996 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003997 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003998 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003999 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004000 (hashfunc)string_hash, /* tp_hash */
4001 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004002 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003 PyObject_GenericGetAttr, /* tp_getattro */
4004 0, /* tp_setattro */
4005 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004006 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004007 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004008 string_doc, /* tp_doc */
4009 0, /* tp_traverse */
4010 0, /* tp_clear */
4011 (richcmpfunc)string_richcompare, /* tp_richcompare */
4012 0, /* tp_weaklistoffset */
4013 0, /* tp_iter */
4014 0, /* tp_iternext */
4015 string_methods, /* tp_methods */
4016 0, /* tp_members */
4017 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004018 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004019 0, /* tp_dict */
4020 0, /* tp_descr_get */
4021 0, /* tp_descr_set */
4022 0, /* tp_dictoffset */
4023 0, /* tp_init */
4024 0, /* tp_alloc */
4025 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004026 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004027};
4028
4029void
Fred Drakeba096332000-07-09 07:04:36 +00004030PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004031{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004032 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004033 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004034 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035 if (w == NULL || !PyString_Check(*pv)) {
4036 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004037 *pv = NULL;
4038 return;
4039 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004040 v = string_concat((PyStringObject *) *pv, w);
4041 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004042 *pv = v;
4043}
4044
Guido van Rossum013142a1994-08-30 08:19:36 +00004045void
Fred Drakeba096332000-07-09 07:04:36 +00004046PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004047{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004048 PyString_Concat(pv, w);
4049 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004050}
4051
4052
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004053/* The following function breaks the notion that strings are immutable:
4054 it changes the size of a string. We get away with this only if there
4055 is only one module referencing the object. You can also think of it
4056 as creating a new string object and destroying the old one, only
4057 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004058 already be known to some other part of the code...
4059 Note that if there's not enough memory to resize the string, the original
4060 string object at *pv is deallocated, *pv is set to NULL, an "out of
4061 memory" exception is set, and -1 is returned. Else (on success) 0 is
4062 returned, and the value in *pv may or may not be the same as on input.
4063 As always, an extra byte is allocated for a trailing \0 byte (newsize
4064 does *not* include that), and a trailing \0 byte is stored.
4065*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066
4067int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004068_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004069{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070 register PyObject *v;
4071 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004072 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004073 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4074 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004075 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004076 Py_DECREF(v);
4077 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004078 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004079 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004080 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004081 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 _Py_ForgetReference(v);
4083 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004084 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004086 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004087 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004088 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004090 _Py_NewReference(*pv);
4091 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004092 sv->ob_size = newsize;
4093 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004094 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004095 return 0;
4096}
Guido van Rossume5372401993-03-16 12:15:04 +00004097
4098/* Helpers for formatstring */
4099
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004100Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004101getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004102{
Thomas Wouters977485d2006-02-16 15:59:12 +00004103 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004104 if (argidx < arglen) {
4105 (*p_argidx)++;
4106 if (arglen < 0)
4107 return args;
4108 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004109 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004110 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004111 PyErr_SetString(PyExc_TypeError,
4112 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004113 return NULL;
4114}
4115
Tim Peters38fd5b62000-09-21 05:43:11 +00004116/* Format codes
4117 * F_LJUST '-'
4118 * F_SIGN '+'
4119 * F_BLANK ' '
4120 * F_ALT '#'
4121 * F_ZERO '0'
4122 */
Guido van Rossume5372401993-03-16 12:15:04 +00004123#define F_LJUST (1<<0)
4124#define F_SIGN (1<<1)
4125#define F_BLANK (1<<2)
4126#define F_ALT (1<<3)
4127#define F_ZERO (1<<4)
4128
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004129Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004130formatfloat(char *buf, size_t buflen, int flags,
4131 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004132{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004133 /* fmt = '%#.' + `prec` + `type`
4134 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004135 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004136 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004137 x = PyFloat_AsDouble(v);
4138 if (x == -1.0 && PyErr_Occurred()) {
4139 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004140 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004141 }
Guido van Rossume5372401993-03-16 12:15:04 +00004142 if (prec < 0)
4143 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004144 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4145 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004146 /* Worst case length calc to ensure no buffer overrun:
4147
4148 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004149 fmt = %#.<prec>g
4150 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004151 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004152 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004153
4154 'f' formats:
4155 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4156 len = 1 + 50 + 1 + prec = 52 + prec
4157
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004158 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004159 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004160
4161 */
4162 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4163 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004165 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004166 return -1;
4167 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004168 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4169 (flags&F_ALT) ? "#" : "",
4170 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004171 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004172 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004173}
4174
Tim Peters38fd5b62000-09-21 05:43:11 +00004175/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4176 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4177 * Python's regular ints.
4178 * Return value: a new PyString*, or NULL if error.
4179 * . *pbuf is set to point into it,
4180 * *plen set to the # of chars following that.
4181 * Caller must decref it when done using pbuf.
4182 * The string starting at *pbuf is of the form
4183 * "-"? ("0x" | "0X")? digit+
4184 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004185 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004186 * There will be at least prec digits, zero-filled on the left if
4187 * necessary to get that many.
4188 * val object to be converted
4189 * flags bitmask of format flags; only F_ALT is looked at
4190 * prec minimum number of digits; 0-fill on left if needed
4191 * type a character in [duoxX]; u acts the same as d
4192 *
4193 * CAUTION: o, x and X conversions on regular ints can never
4194 * produce a '-' sign, but can for Python's unbounded ints.
4195 */
4196PyObject*
4197_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4198 char **pbuf, int *plen)
4199{
4200 PyObject *result = NULL;
4201 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004202 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004203 int sign; /* 1 if '-', else 0 */
4204 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004205 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004206 int numdigits; /* len == numnondigits + numdigits */
4207 int numnondigits = 0;
4208
4209 switch (type) {
4210 case 'd':
4211 case 'u':
4212 result = val->ob_type->tp_str(val);
4213 break;
4214 case 'o':
4215 result = val->ob_type->tp_as_number->nb_oct(val);
4216 break;
4217 case 'x':
4218 case 'X':
4219 numnondigits = 2;
4220 result = val->ob_type->tp_as_number->nb_hex(val);
4221 break;
4222 default:
4223 assert(!"'type' not in [duoxX]");
4224 }
4225 if (!result)
4226 return NULL;
4227
Neal Norwitz56423e52006-08-13 18:11:08 +00004228 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004229 if (!buf) {
4230 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004231 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004232 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004233
Tim Peters38fd5b62000-09-21 05:43:11 +00004234 /* To modify the string in-place, there can only be one reference. */
4235 if (result->ob_refcnt != 1) {
4236 PyErr_BadInternalCall();
4237 return NULL;
4238 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004239 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004240 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004241 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4242 return NULL;
4243 }
4244 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004245 if (buf[len-1] == 'L') {
4246 --len;
4247 buf[len] = '\0';
4248 }
4249 sign = buf[0] == '-';
4250 numnondigits += sign;
4251 numdigits = len - numnondigits;
4252 assert(numdigits > 0);
4253
Tim Petersfff53252001-04-12 18:38:48 +00004254 /* Get rid of base marker unless F_ALT */
4255 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004256 /* Need to skip 0x, 0X or 0. */
4257 int skipped = 0;
4258 switch (type) {
4259 case 'o':
4260 assert(buf[sign] == '0');
4261 /* If 0 is only digit, leave it alone. */
4262 if (numdigits > 1) {
4263 skipped = 1;
4264 --numdigits;
4265 }
4266 break;
4267 case 'x':
4268 case 'X':
4269 assert(buf[sign] == '0');
4270 assert(buf[sign + 1] == 'x');
4271 skipped = 2;
4272 numnondigits -= 2;
4273 break;
4274 }
4275 if (skipped) {
4276 buf += skipped;
4277 len -= skipped;
4278 if (sign)
4279 buf[0] = '-';
4280 }
4281 assert(len == numnondigits + numdigits);
4282 assert(numdigits > 0);
4283 }
4284
4285 /* Fill with leading zeroes to meet minimum width. */
4286 if (prec > numdigits) {
4287 PyObject *r1 = PyString_FromStringAndSize(NULL,
4288 numnondigits + prec);
4289 char *b1;
4290 if (!r1) {
4291 Py_DECREF(result);
4292 return NULL;
4293 }
4294 b1 = PyString_AS_STRING(r1);
4295 for (i = 0; i < numnondigits; ++i)
4296 *b1++ = *buf++;
4297 for (i = 0; i < prec - numdigits; i++)
4298 *b1++ = '0';
4299 for (i = 0; i < numdigits; i++)
4300 *b1++ = *buf++;
4301 *b1 = '\0';
4302 Py_DECREF(result);
4303 result = r1;
4304 buf = PyString_AS_STRING(result);
4305 len = numnondigits + prec;
4306 }
4307
4308 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004309 if (type == 'X') {
4310 /* Need to convert all lower case letters to upper case.
4311 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004312 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004313 if (buf[i] >= 'a' && buf[i] <= 'x')
4314 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 }
4316 *pbuf = buf;
4317 *plen = len;
4318 return result;
4319}
4320
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004321Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004322formatint(char *buf, size_t buflen, int flags,
4323 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004324{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004325 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004326 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4327 + 1 + 1 = 24 */
4328 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004329 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004330 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004331
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004332 x = PyInt_AsLong(v);
4333 if (x == -1 && PyErr_Occurred()) {
4334 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004335 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004336 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004337 if (x < 0 && type == 'u') {
4338 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004339 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004340 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4341 sign = "-";
4342 else
4343 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004344 if (prec < 0)
4345 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004346
4347 if ((flags & F_ALT) &&
4348 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004349 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004350 * of issues that cause pain:
4351 * - when 0 is being converted, the C standard leaves off
4352 * the '0x' or '0X', which is inconsistent with other
4353 * %#x/%#X conversions and inconsistent with Python's
4354 * hex() function
4355 * - there are platforms that violate the standard and
4356 * convert 0 with the '0x' or '0X'
4357 * (Metrowerks, Compaq Tru64)
4358 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004359 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004360 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004361 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004362 * We can achieve the desired consistency by inserting our
4363 * own '0x' or '0X' prefix, and substituting %x/%X in place
4364 * of %#x/%#X.
4365 *
4366 * Note that this is the same approach as used in
4367 * formatint() in unicodeobject.c
4368 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004369 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4370 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 }
4372 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004373 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4374 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004375 prec, type);
4376 }
4377
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004378 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4379 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004380 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004381 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004382 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004383 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004384 return -1;
4385 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004386 if (sign[0])
4387 PyOS_snprintf(buf, buflen, fmt, -x);
4388 else
4389 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004390 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004391}
4392
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004393Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004394formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004395{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004396 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004397 if (PyString_Check(v)) {
4398 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004399 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004400 }
4401 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004402 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004403 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004404 }
4405 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004406 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004407}
4408
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004409/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4410
4411 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4412 chars are formatted. XXX This is a magic number. Each formatting
4413 routine does bounds checking to ensure no overflow, but a better
4414 solution may be to malloc a buffer of appropriate size for each
4415 format. For now, the current solution is sufficient.
4416*/
4417#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004418
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004419PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004420PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004421{
4422 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004423 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004424 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004425 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004426 PyObject *result, *orig_args;
4427#ifdef Py_USING_UNICODE
4428 PyObject *v, *w;
4429#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004430 PyObject *dict = NULL;
4431 if (format == NULL || !PyString_Check(format) || args == NULL) {
4432 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004433 return NULL;
4434 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004435 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004436 fmt = PyString_AS_STRING(format);
4437 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004438 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004439 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004440 if (result == NULL)
4441 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004442 res = PyString_AsString(result);
4443 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004444 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004445 argidx = 0;
4446 }
4447 else {
4448 arglen = -1;
4449 argidx = -2;
4450 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004451 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4452 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004453 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004454 while (--fmtcnt >= 0) {
4455 if (*fmt != '%') {
4456 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004457 rescnt = fmtcnt + 100;
4458 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004459 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004460 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004461 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004462 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004463 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004464 }
4465 *res++ = *fmt++;
4466 }
4467 else {
4468 /* Got a format specifier */
4469 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004470 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004471 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004472 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004473 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004474 PyObject *v = NULL;
4475 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004476 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004477 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004478 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004479 char formatbuf[FORMATBUFLEN];
4480 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004481#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004482 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004483 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004484#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004485
Guido van Rossumda9c2711996-12-05 21:58:58 +00004486 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004487 if (*fmt == '(') {
4488 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004489 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004490 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004491 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004492
4493 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004494 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004495 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004496 goto error;
4497 }
4498 ++fmt;
4499 --fmtcnt;
4500 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004501 /* Skip over balanced parentheses */
4502 while (pcount > 0 && --fmtcnt >= 0) {
4503 if (*fmt == ')')
4504 --pcount;
4505 else if (*fmt == '(')
4506 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004507 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004508 }
4509 keylen = fmt - keystart - 1;
4510 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004512 "incomplete format key");
4513 goto error;
4514 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004515 key = PyString_FromStringAndSize(keystart,
4516 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004517 if (key == NULL)
4518 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004519 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004520 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004521 args_owned = 0;
4522 }
4523 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004524 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004525 if (args == NULL) {
4526 goto error;
4527 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004528 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004529 arglen = -1;
4530 argidx = -2;
4531 }
Guido van Rossume5372401993-03-16 12:15:04 +00004532 while (--fmtcnt >= 0) {
4533 switch (c = *fmt++) {
4534 case '-': flags |= F_LJUST; continue;
4535 case '+': flags |= F_SIGN; continue;
4536 case ' ': flags |= F_BLANK; continue;
4537 case '#': flags |= F_ALT; continue;
4538 case '0': flags |= F_ZERO; continue;
4539 }
4540 break;
4541 }
4542 if (c == '*') {
4543 v = getnextarg(args, arglen, &argidx);
4544 if (v == NULL)
4545 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004546 if (!PyInt_Check(v)) {
4547 PyErr_SetString(PyExc_TypeError,
4548 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004549 goto error;
4550 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004551 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004552 if (width < 0) {
4553 flags |= F_LJUST;
4554 width = -width;
4555 }
Guido van Rossume5372401993-03-16 12:15:04 +00004556 if (--fmtcnt >= 0)
4557 c = *fmt++;
4558 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004559 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004560 width = c - '0';
4561 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004562 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004563 if (!isdigit(c))
4564 break;
4565 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 PyErr_SetString(
4567 PyExc_ValueError,
4568 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004569 goto error;
4570 }
4571 width = width*10 + (c - '0');
4572 }
4573 }
4574 if (c == '.') {
4575 prec = 0;
4576 if (--fmtcnt >= 0)
4577 c = *fmt++;
4578 if (c == '*') {
4579 v = getnextarg(args, arglen, &argidx);
4580 if (v == NULL)
4581 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004582 if (!PyInt_Check(v)) {
4583 PyErr_SetString(
4584 PyExc_TypeError,
4585 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004586 goto error;
4587 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004588 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004589 if (prec < 0)
4590 prec = 0;
4591 if (--fmtcnt >= 0)
4592 c = *fmt++;
4593 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004594 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004595 prec = c - '0';
4596 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004597 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004598 if (!isdigit(c))
4599 break;
4600 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004601 PyErr_SetString(
4602 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004603 "prec too big");
4604 goto error;
4605 }
4606 prec = prec*10 + (c - '0');
4607 }
4608 }
4609 } /* prec */
4610 if (fmtcnt >= 0) {
4611 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004612 if (--fmtcnt >= 0)
4613 c = *fmt++;
4614 }
4615 }
4616 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004617 PyErr_SetString(PyExc_ValueError,
4618 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004619 goto error;
4620 }
4621 if (c != '%') {
4622 v = getnextarg(args, arglen, &argidx);
4623 if (v == NULL)
4624 goto error;
4625 }
4626 sign = 0;
4627 fill = ' ';
4628 switch (c) {
4629 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004630 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004631 len = 1;
4632 break;
4633 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004634#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004635 if (PyUnicode_Check(v)) {
4636 fmt = fmt_start;
4637 argidx = argidx_start;
4638 goto unicode;
4639 }
Georg Brandld45014b2005-10-01 17:06:00 +00004640#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004641 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004642#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004643 if (temp != NULL && PyUnicode_Check(temp)) {
4644 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004645 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004646 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004647 goto unicode;
4648 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004649#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004650 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004651 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004652 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004653 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004654 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004655 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004656 if (!PyString_Check(temp)) {
4657 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004658 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004659 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004660 goto error;
4661 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004662 pbuf = PyString_AS_STRING(temp);
4663 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004664 if (prec >= 0 && len > prec)
4665 len = prec;
4666 break;
4667 case 'i':
4668 case 'd':
4669 case 'u':
4670 case 'o':
4671 case 'x':
4672 case 'X':
4673 if (c == 'i')
4674 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004675 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004676 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004677 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004678 prec, c, &pbuf, &ilen);
4679 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004680 if (!temp)
4681 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004682 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004683 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004684 else {
4685 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004686 len = formatint(pbuf,
4687 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004688 flags, prec, c, v);
4689 if (len < 0)
4690 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004691 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004692 }
4693 if (flags & F_ZERO)
4694 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004695 break;
4696 case 'e':
4697 case 'E':
4698 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004699 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004700 case 'g':
4701 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004702 if (c == 'F')
4703 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004704 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004705 len = formatfloat(pbuf, sizeof(formatbuf),
4706 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004707 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004708 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004709 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004711 fill = '0';
4712 break;
4713 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004714#ifdef Py_USING_UNICODE
4715 if (PyUnicode_Check(v)) {
4716 fmt = fmt_start;
4717 argidx = argidx_start;
4718 goto unicode;
4719 }
4720#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004721 pbuf = formatbuf;
4722 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004723 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004724 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004725 break;
4726 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004727 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004728 "unsupported format character '%c' (0x%x) "
4729 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004730 c, c,
4731 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004732 goto error;
4733 }
4734 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004735 if (*pbuf == '-' || *pbuf == '+') {
4736 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004737 len--;
4738 }
4739 else if (flags & F_SIGN)
4740 sign = '+';
4741 else if (flags & F_BLANK)
4742 sign = ' ';
4743 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004744 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004745 }
4746 if (width < len)
4747 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004748 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004749 reslen -= rescnt;
4750 rescnt = width + fmtcnt + 100;
4751 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004752 if (reslen < 0) {
4753 Py_DECREF(result);
4754 return PyErr_NoMemory();
4755 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004756 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004757 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004758 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004759 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004760 }
4761 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004762 if (fill != ' ')
4763 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004764 rescnt--;
4765 if (width > len)
4766 width--;
4767 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004768 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4769 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004770 assert(pbuf[1] == c);
4771 if (fill != ' ') {
4772 *res++ = *pbuf++;
4773 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004774 }
Tim Petersfff53252001-04-12 18:38:48 +00004775 rescnt -= 2;
4776 width -= 2;
4777 if (width < 0)
4778 width = 0;
4779 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004780 }
4781 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004782 do {
4783 --rescnt;
4784 *res++ = fill;
4785 } while (--width > len);
4786 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004787 if (fill == ' ') {
4788 if (sign)
4789 *res++ = sign;
4790 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004791 (c == 'x' || c == 'X')) {
4792 assert(pbuf[0] == '0');
4793 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004794 *res++ = *pbuf++;
4795 *res++ = *pbuf++;
4796 }
4797 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004798 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004799 res += len;
4800 rescnt -= len;
4801 while (--width >= len) {
4802 --rescnt;
4803 *res++ = ' ';
4804 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004805 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004806 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004807 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004808 goto error;
4809 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004810 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004811 } /* '%' */
4812 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004813 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004814 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004815 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004816 goto error;
4817 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004818 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004819 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004820 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004821 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004822 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004823
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004824#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004825 unicode:
4826 if (args_owned) {
4827 Py_DECREF(args);
4828 args_owned = 0;
4829 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004830 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004831 if (PyTuple_Check(orig_args) && argidx > 0) {
4832 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004833 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004834 v = PyTuple_New(n);
4835 if (v == NULL)
4836 goto error;
4837 while (--n >= 0) {
4838 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4839 Py_INCREF(w);
4840 PyTuple_SET_ITEM(v, n, w);
4841 }
4842 args = v;
4843 } else {
4844 Py_INCREF(orig_args);
4845 args = orig_args;
4846 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004847 args_owned = 1;
4848 /* Take what we have of the result and let the Unicode formatting
4849 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004850 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004851 if (_PyString_Resize(&result, rescnt))
4852 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004853 fmtcnt = PyString_GET_SIZE(format) - \
4854 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004855 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4856 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004857 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004858 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004859 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004860 if (v == NULL)
4861 goto error;
4862 /* Paste what we have (result) to what the Unicode formatting
4863 function returned (v) and return the result (or error) */
4864 w = PyUnicode_Concat(result, v);
4865 Py_DECREF(result);
4866 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004867 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004868 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004869#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004870
Guido van Rossume5372401993-03-16 12:15:04 +00004871 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004872 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004873 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004874 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004875 }
Guido van Rossume5372401993-03-16 12:15:04 +00004876 return NULL;
4877}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004878
Guido van Rossum2a61e741997-01-18 07:55:05 +00004879void
Fred Drakeba096332000-07-09 07:04:36 +00004880PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004881{
4882 register PyStringObject *s = (PyStringObject *)(*p);
4883 PyObject *t;
4884 if (s == NULL || !PyString_Check(s))
4885 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004886 /* If it's a string subclass, we don't really know what putting
4887 it in the interned dict might do. */
4888 if (!PyString_CheckExact(s))
4889 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004890 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004891 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004892 if (interned == NULL) {
4893 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004894 if (interned == NULL) {
4895 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004896 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004897 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004898 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004899 t = PyDict_GetItem(interned, (PyObject *)s);
4900 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004901 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004902 Py_DECREF(*p);
4903 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004904 return;
4905 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004906
Armin Rigo79f7ad22004-08-07 19:27:39 +00004907 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004908 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004909 return;
4910 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004911 /* The two references in interned are not counted by refcnt.
4912 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004913 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004914 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004915}
4916
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004917void
4918PyString_InternImmortal(PyObject **p)
4919{
4920 PyString_InternInPlace(p);
4921 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4922 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4923 Py_INCREF(*p);
4924 }
4925}
4926
Guido van Rossum2a61e741997-01-18 07:55:05 +00004927
4928PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004929PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004930{
4931 PyObject *s = PyString_FromString(cp);
4932 if (s == NULL)
4933 return NULL;
4934 PyString_InternInPlace(&s);
4935 return s;
4936}
4937
Guido van Rossum8cf04761997-08-02 02:57:45 +00004938void
Fred Drakeba096332000-07-09 07:04:36 +00004939PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004940{
4941 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004942 for (i = 0; i < UCHAR_MAX + 1; i++) {
4943 Py_XDECREF(characters[i]);
4944 characters[i] = NULL;
4945 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004946 Py_XDECREF(nullstring);
4947 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004948}
Barry Warsawa903ad982001-02-23 16:40:48 +00004949
Barry Warsawa903ad982001-02-23 16:40:48 +00004950void _Py_ReleaseInternedStrings(void)
4951{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 PyObject *keys;
4953 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004954 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004955
4956 if (interned == NULL || !PyDict_Check(interned))
4957 return;
4958 keys = PyDict_Keys(interned);
4959 if (keys == NULL || !PyList_Check(keys)) {
4960 PyErr_Clear();
4961 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004962 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004963
4964 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4965 detector, interned strings are not forcibly deallocated; rather, we
4966 give them their stolen references back, and then clear and DECREF
4967 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004968
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004969 fprintf(stderr, "releasing interned strings\n");
4970 n = PyList_GET_SIZE(keys);
4971 for (i = 0; i < n; i++) {
4972 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4973 switch (s->ob_sstate) {
4974 case SSTATE_NOT_INTERNED:
4975 /* XXX Shouldn't happen */
4976 break;
4977 case SSTATE_INTERNED_IMMORTAL:
4978 s->ob_refcnt += 1;
4979 break;
4980 case SSTATE_INTERNED_MORTAL:
4981 s->ob_refcnt += 2;
4982 break;
4983 default:
4984 Py_FatalError("Inconsistent interned string state.");
4985 }
4986 s->ob_sstate = SSTATE_NOT_INTERNED;
4987 }
4988 Py_DECREF(keys);
4989 PyDict_Clear(interned);
4990 Py_DECREF(interned);
4991 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004992}