blob: 6673f670a0ab549cc2da69dcc9f700dd73180fa3 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
755 "%.200s found", obj->ob_type->tp_name);
756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000793 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000809 char *data = op->ob_sval;
810 Py_ssize_t size = op->ob_size;
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
815 */
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
820 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000822 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000824 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000825#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000826 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828
Thomas Wouters7e474022000-07-16 12:04:32 +0000829 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000831 if (memchr(op->ob_sval, '\'', op->ob_size) &&
832 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '"';
834
835 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 for (i = 0; i < op->ob_size; i++) {
837 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000844 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 fprintf(fp, "\\r");
846 else if (c < ' ' || c >= 0x7f)
847 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000848 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000849 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000852 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853}
854
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000855PyObject *
856PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000859 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000861 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000862 PyErr_SetString(PyExc_OverflowError,
863 "string is too large to make repr");
Guido van Rossume6a6f392007-11-07 01:19:49 +0000864 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000865 }
866 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000868 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 }
870 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000871 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 register char c;
873 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 int quote;
875
Thomas Wouters7e474022000-07-16 12:04:32 +0000876 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000877 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000878 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000879 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000880 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 quote = '"';
882
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000884 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000886 /* There's at least enough room for a hex escape
887 and a closing quote. */
888 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000889 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000890 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000892 else if (c == '\t')
893 *p++ = '\\', *p++ = 't';
894 else if (c == '\n')
895 *p++ = '\\', *p++ = 'n';
896 else if (c == '\r')
897 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000898 else if (c < ' ' || c >= 0x7f) {
899 /* For performance, we don't want to call
900 PyOS_snprintf here (extra layers of
901 function call). */
902 sprintf(p, "\\x%02x", c & 0xff);
903 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000904 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000905 else
906 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000908 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000909 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000912 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915}
916
Guido van Rossum189f1df2001-05-01 16:51:53 +0000917static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000918string_repr(PyObject *op)
919{
920 return PyString_Repr(op, 1);
921}
922
923static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000924string_str(PyObject *s)
925{
Tim Petersc9933152001-10-16 20:18:24 +0000926 assert(PyString_Check(s));
927 if (PyString_CheckExact(s)) {
928 Py_INCREF(s);
929 return s;
930 }
931 else {
932 /* Subtype -- return genuine string with the same value. */
933 PyStringObject *t = (PyStringObject *) s;
934 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
935 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000936}
937
Martin v. Löwis18e16552006-02-15 17:27:45 +0000938static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000939string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940{
941 return a->ob_size;
942}
943
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000945string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946{
Andrew Dalke598710c2006-05-25 18:18:39 +0000947 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 register PyStringObject *op;
949 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000951 if (PyUnicode_Check(bb))
952 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000953#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000954 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000955 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000956 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 return NULL;
958 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000959#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000960 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000961 if ((a->ob_size == 0 || b->ob_size == 0) &&
962 PyString_CheckExact(a) && PyString_CheckExact(b)) {
963 if (a->ob_size == 0) {
964 Py_INCREF(bb);
965 return bb;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 Py_INCREF(a);
968 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000969 }
970 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000971 if (size < 0) {
972 PyErr_SetString(PyExc_OverflowError,
973 "strings are too large to concat");
974 return NULL;
975 }
976
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000977 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000978 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000979 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000981 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000982 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000983 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000984 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
985 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000986 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988#undef b
989}
990
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000992string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000994 register Py_ssize_t i;
995 register Py_ssize_t j;
996 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000998 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 if (n < 0)
1000 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001001 /* watch out for overflows: the size can overflow int,
1002 * and the # of bytes needed can overflow size_t
1003 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001005 if (n && size / n != a->ob_size) {
1006 PyErr_SetString(PyExc_OverflowError,
1007 "repeated string is too long");
1008 return NULL;
1009 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001010 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011 Py_INCREF(a);
1012 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001013 }
Tim Peterse7c05322004-06-27 17:24:49 +00001014 nbytes = (size_t)size;
1015 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001016 PyErr_SetString(PyExc_OverflowError,
1017 "repeated string is too long");
1018 return NULL;
1019 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001021 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001022 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001024 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001025 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001026 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001027 op->ob_sval[size] = '\0';
1028 if (a->ob_size == 1 && n > 0) {
1029 memset(op->ob_sval, a->ob_sval[0] , n);
1030 return (PyObject *) op;
1031 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001032 i = 0;
1033 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001034 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001035 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001036 }
1037 while (i < size) {
1038 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001039 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i += j;
1041 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
1045/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1046
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001047static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001048string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001049 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001050 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051{
1052 if (i < 0)
1053 i = 0;
1054 if (j < 0)
1055 j = 0; /* Avoid signed/unsigned bug in next line */
1056 if (j > a->ob_size)
1057 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001058 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1059 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001060 Py_INCREF(a);
1061 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001062 }
1063 if (j < i)
1064 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001065 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001066}
1067
Guido van Rossum9284a572000-03-07 15:53:43 +00001068static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001069string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001070{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001071 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001072#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001073 if (PyUnicode_Check(sub_obj))
1074 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001075#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001076 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001077 PyErr_SetString(PyExc_TypeError,
1078 "'in <string>' requires string as left operand");
1079 return -1;
1080 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001081 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001082
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001084}
1085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001086static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001087string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001089 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001092 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return NULL;
1094 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001095 pchar = a->ob_sval[i];
1096 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001097 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001099 else {
1100#ifdef COUNT_ALLOCS
1101 one_strings++;
1102#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001103 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001105 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001106}
1107
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108static PyObject*
1109string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112 Py_ssize_t len_a, len_b;
1113 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 PyObject *result;
1115
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 result = Py_NotImplemented;
1119 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001120 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121 if (a == b) {
1122 switch (op) {
1123 case Py_EQ:case Py_LE:case Py_GE:
1124 result = Py_True;
1125 goto out;
1126 case Py_NE:case Py_LT:case Py_GT:
1127 result = Py_False;
1128 goto out;
1129 }
1130 }
1131 if (op == Py_EQ) {
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a->ob_size == b->ob_size
1135 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001136 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 a->ob_size) == 0)) {
1138 result = Py_True;
1139 } else {
1140 result = Py_False;
1141 }
1142 goto out;
1143 }
1144 len_a = a->ob_size; len_b = b->ob_size;
1145 min_len = (len_a < len_b) ? len_a : len_b;
1146 if (min_len > 0) {
1147 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1148 if (c==0)
1149 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1150 }else
1151 c = 0;
1152 if (c == 0)
1153 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1154 switch (op) {
1155 case Py_LT: c = c < 0; break;
1156 case Py_LE: c = c <= 0; break;
1157 case Py_EQ: assert(0); break; /* unreachable */
1158 case Py_NE: c = c != 0; break;
1159 case Py_GT: c = c > 0; break;
1160 case Py_GE: c = c >= 0; break;
1161 default:
1162 result = Py_NotImplemented;
1163 goto out;
1164 }
1165 result = c ? Py_True : Py_False;
1166 out:
1167 Py_INCREF(result);
1168 return result;
1169}
1170
1171int
1172_PyString_Eq(PyObject *o1, PyObject *o2)
1173{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001174 PyStringObject *a = (PyStringObject*) o1;
1175 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001176 return a->ob_size == b->ob_size
1177 && *a->ob_sval == *b->ob_sval
1178 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001179}
1180
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181static long
Fred Drakeba096332000-07-09 07:04:36 +00001182string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001184 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 register unsigned char *p;
1186 register long x;
1187
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001188 if (a->ob_shash != -1)
1189 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001190 len = a->ob_size;
1191 p = (unsigned char *) a->ob_sval;
1192 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001194 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001195 x ^= a->ob_size;
1196 if (x == -1)
1197 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001198 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001199 return x;
1200}
1201
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001202static PyObject*
1203string_subscript(PyStringObject* self, PyObject* item)
1204{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001205 if (PyIndex_Check(item)) {
1206 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001207 if (i == -1 && PyErr_Occurred())
1208 return NULL;
1209 if (i < 0)
1210 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001211 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 }
1213 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001214 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 char* source_buf;
1216 char* result_buf;
1217 PyObject* result;
1218
Tim Petersae1d0c92006-03-17 03:29:34 +00001219 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 PyString_GET_SIZE(self),
1221 &start, &stop, &step, &slicelength) < 0) {
1222 return NULL;
1223 }
1224
1225 if (slicelength <= 0) {
1226 return PyString_FromStringAndSize("", 0);
1227 }
1228 else {
1229 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001230 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001231 if (result_buf == NULL)
1232 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233
Tim Petersae1d0c92006-03-17 03:29:34 +00001234 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001235 cur += step, i++) {
1236 result_buf[i] = source_buf[cur];
1237 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001238
1239 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001240 slicelength);
1241 PyMem_Free(result_buf);
1242 return result;
1243 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001244 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001245 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001246 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 "string indices must be integers");
1248 return NULL;
1249 }
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
1255 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001257 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259 }
1260 *ptr = (void *)self->ob_sval;
1261 return self->ob_size;
1262}
1263
Martin v. Löwis18e16552006-02-15 17:27:45 +00001264static Py_ssize_t
1265string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266{
Guido van Rossum045e6881997-09-08 18:30:11 +00001267 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001268 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001269 return -1;
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( lenp )
1276 *lenp = self->ob_size;
1277 return 1;
1278}
1279
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280static Py_ssize_t
1281string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001282{
1283 if ( index != 0 ) {
1284 PyErr_SetString(PyExc_SystemError,
1285 "accessing non-existent string segment");
1286 return -1;
1287 }
1288 *ptr = self->ob_sval;
1289 return self->ob_size;
1290}
1291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001292static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001294 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (ssizeargfunc)string_repeat, /*sq_repeat*/
1296 (ssizeargfunc)string_item, /*sq_item*/
1297 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001298 0, /*sq_ass_item*/
1299 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001300 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001301};
1302
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001305 (binaryfunc)string_subscript,
1306 0,
1307};
1308
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001309static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 (readbufferproc)string_buffer_getreadbuf,
1311 (writebufferproc)string_buffer_getwritebuf,
1312 (segcountproc)string_buffer_getsegcount,
1313 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001314};
1315
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316
1317
1318#define LEFTSTRIP 0
1319#define RIGHTSTRIP 1
1320#define BOTHSTRIP 2
1321
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001322/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001323static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1324
1325#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001326
Andrew Dalke525eab32006-05-26 14:00:45 +00001327
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001328/* Don't call if length < 2 */
1329#define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1333
1334
Andrew Dalke525eab32006-05-26 14:00:45 +00001335/* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1341
1342#define MAX_PREALLOC 12
1343
1344/* 5 splits gives 6 elements */
1345#define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1347
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348#define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1356 } \
1357 else \
1358 Py_DECREF(str);
1359
Andrew Dalke02758d62006-05-26 15:21:01 +00001360#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1363 if (str == NULL) \
1364 goto onError; \
1365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1367 } else { \
1368 if (PyList_Append(list, str)) { \
1369 Py_DECREF(str); \
1370 goto onError; \
1371 } \
1372 else \
1373 Py_DECREF(str); \
1374 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001375 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001376
1377/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001378#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1384
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001385Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001386split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
Andrew Dalke525eab32006-05-26 14:00:45 +00001388 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001389 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001390 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392 if (list == NULL)
1393 return NULL;
1394
Andrew Dalke02758d62006-05-26 15:21:01 +00001395 i = j = 0;
1396
1397 while (maxsplit-- > 0) {
1398 SKIP_SPACE(s, i, len);
1399 if (i==len) break;
1400 j = i; i++;
1401 SKIP_NONSPACE(s, i, len);
1402 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001404
1405 if (i < len) {
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s, i, len);
1409 if (i != len)
1410 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 Py_DECREF(list);
1416 return NULL;
1417}
1418
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001419Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001420split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421{
Andrew Dalke525eab32006-05-26 14:00:45 +00001422 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425
1426 if (list == NULL)
1427 return NULL;
1428
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001429 i = j = 0;
1430 while ((j < len) && (maxcount-- > 0)) {
1431 for(; j<len; j++) {
1432 /* I found that using memchr makes no difference */
1433 if (s[j] == ch) {
1434 SPLIT_ADD(s, i, j);
1435 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001437 }
1438 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001440 if (i <= len) {
1441 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001443 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444 return list;
1445
1446 onError:
1447 Py_DECREF(list);
1448 return NULL;
1449}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001451PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452"S.split([sep [,maxsplit]]) -> list of strings\n\
1453\n\
1454Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001456splits are done. If sep is not specified or is None, any\n\
1457whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
1459static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001460string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001462 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001463 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001465 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001466#ifdef USE_FAST
1467 Py_ssize_t pos;
1468#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Martin v. Löwis9c830762006-04-13 08:37:17 +00001470 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001473 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1479 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001483#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 if (n == 0) {
1488 PyErr_SetString(PyExc_ValueError, "empty separator");
1489 return NULL;
1490 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491 else if (n == 1)
1492 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493
Andrew Dalke525eab32006-05-26 14:00:45 +00001494 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (list == NULL)
1496 return NULL;
1497
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001498#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001500 while (maxsplit-- > 0) {
1501 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502 if (pos < 0)
1503 break;
1504 j = i+pos;
1505 SPLIT_ADD(s, i, j);
1506 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001508#else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1516 }
1517 }
1518 }
1519#endif
1520 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001521 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return list;
1523
Andrew Dalke525eab32006-05-26 14:00:45 +00001524 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001529PyDoc_STRVAR(partition__doc__,
1530"S.partition(sep) -> (head, sep, tail)\n\
1531\n\
1532Searches for the separator sep in S, and returns the part before it,\n\
1533the separator itself, and the part after it. If the separator is not\n\
1534found, returns S and two empty strings.");
1535
1536static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001537string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001539 const char *sep;
1540 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001541
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001545 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001546#ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001548 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001549#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001551 return NULL;
1552
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001553 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1557 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001558}
1559
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001560PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001561"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001562\n\
1563Searches for the separator sep in S, starting at the end of S, and returns\n\
1564the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001565separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001566
1567static PyObject *
1568string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569{
1570 const char *sep;
1571 Py_ssize_t sep_len;
1572
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1576 }
1577#ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580#endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1583
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001584 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1588 );
1589}
1590
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001591Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001592rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593{
Andrew Dalke525eab32006-05-26 14:00:45 +00001594 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597
1598 if (list == NULL)
1599 return NULL;
1600
Andrew Dalke02758d62006-05-26 15:21:01 +00001601 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001602
Andrew Dalke02758d62006-05-26 15:21:01 +00001603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1616
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001618 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001619 if (PyList_Reverse(list) < 0)
1620 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001627Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001628rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629{
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001644 }
1645 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
1647 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001648 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001650 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001651 if (PyList_Reverse(list) < 0)
1652 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 return list;
1654
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1658}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659
1660PyDoc_STRVAR(rsplit__doc__,
1661"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662\n\
1663Return a list of the words in the string S, using sep as the\n\
1664delimiter string, starting at the end of the string and working\n\
1665to the front. If maxsplit is given, at most maxsplit splits are\n\
1666done. If sep is not specified or is None, any whitespace string\n\
1667is a separator.");
1668
1669static PyObject *
1670string_rsplit(PyStringObject *self, PyObject *args)
1671{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001673 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001675 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Martin v. Löwis9c830762006-04-13 08:37:17 +00001677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return NULL;
1679 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001680 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
1687#ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690#endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1697 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700
Andrew Dalke525eab32006-05-26 14:00:45 +00001701 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 if (list == NULL)
1703 return NULL;
1704
1705 j = len;
1706 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001707
1708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001715 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001716 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001717 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return list;
1723
Andrew Dalke525eab32006-05-26 14:00:45 +00001724onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 Py_DECREF(list);
1726 return NULL;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731"S.join(sequence) -> string\n\
1732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
1736static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001737string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
1739 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 return NULL;
1751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001753 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1757 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001763 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001768 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1783 */
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001786 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001787 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001791 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001792 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001793 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 Py_DECREF(seq);
1795 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001802 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 Py_DECREF(seq);
1804 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 }
1807
1808 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001809 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1813 }
1814
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001821 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 p += n;
1823 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001824 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001828
Jeremy Hylton49048292000-07-11 03:28:17 +00001829 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
Tim Peters52e155e2001-06-16 05:42:57 +00001833PyObject *
1834_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001835{
Tim Petersa7259592001-06-16 05:11:17 +00001836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001839}
1840
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001841Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001843{
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1854}
1855
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001856Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001857string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001860 const char *sub;
1861 Py_ssize_t sub_len;
1862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001864 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 return -2;
1867 if (PyString_Check(subobj)) {
1868 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001869 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001871#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001873 return PyUnicode_Find(
1874 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001876 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 return -2;
1880
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001881 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001882 return stringlib_find_slice(
1883 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001885 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889}
1890
1891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893"S.find(sub [,start [,end]]) -> int\n\
1894\n\
1895Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001896such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897arguments start and end are interpreted as in slice notation.\n\
1898\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001902string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (result == -2)
1906 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.index(sub [,start [,end]]) -> int\n\
1913\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001914Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
1916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001917string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920 if (result == -2)
1921 return NULL;
1922 if (result == -1) {
1923 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001924 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 return NULL;
1926 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928}
1929
1930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932"S.rfind(sub [,start [,end]]) -> int\n\
1933\n\
1934Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001935such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936arguments start and end are interpreted as in slice notation.\n\
1937\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 if (result == -2)
1945 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rindex(sub [,start [,end]]) -> int\n\
1952\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001953Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
1955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001956string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001958 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 if (result == -2)
1960 return NULL;
1961 if (result == -1) {
1962 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001963 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return NULL;
1965 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001970Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1972{
1973 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978
1979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
1981 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982 i++;
1983 }
1984 }
1985
1986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
1990 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991 j++;
1992 }
1993
1994 if (i == 0 && j == len && PyString_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyString_FromStringAndSize(s+i, j-i);
2000}
2001
2002
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002004do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005{
2006 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012 i++;
2013 }
2014 }
2015
2016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021 j++;
2022 }
2023
Tim Peters8fa5dd02001-09-12 02:18:30 +00002024 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 Py_INCREF(self);
2026 return (PyObject*)self;
2027 }
2028 else
2029 return PyString_FromStringAndSize(s+i, j-i);
2030}
2031
2032
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002033Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002034do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2035{
2036 PyObject *sep = NULL;
2037
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002038 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039 return NULL;
2040
2041 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 if (PyString_Check(sep))
2043 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002045 else if (PyUnicode_Check(sep)) {
2046 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047 PyObject *res;
2048 if (uniself==NULL)
2049 return NULL;
2050 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051 striptype, sep);
2052 Py_DECREF(uniself);
2053 return res;
2054 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002056 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002058 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002060 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002062 STRIPNAME(striptype));
2063 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064 }
2065
2066 return do_strip(self, striptype);
2067}
2068
2069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002070PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002071"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072\n\
2073Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002074whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075If chars is given and not None, remove characters in chars instead.\n\
2076If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077
2078static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081 if (PyTuple_GET_SIZE(args) == 0)
2082 return do_strip(self, BOTHSTRIP); /* Common case */
2083 else
2084 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085}
2086
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002089"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092If chars is given and not None, remove characters in chars instead.\n\
2093If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
2095static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, LEFTSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102}
2103
2104
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002108Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109If chars is given and not None, remove characters in chars instead.\n\
2110If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
2112static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, RIGHTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123"S.lower() -> string\n\
2124\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002127/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128#ifndef _tolower
2129#define _tolower tolower
2130#endif
2131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002133string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002135 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002136 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002137 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002139 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002142
2143 s = PyString_AS_STRING(newobj);
2144
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002145 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002148 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002149 if (isupper(c))
2150 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002152
Anthony Baxtera6286212006-04-11 07:42:36 +00002153 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154}
2155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157"S.upper() -> string\n\
2158\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002161#ifndef _toupper
2162#define _toupper toupper
2163#endif
2164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002166string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002168 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002169 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002170 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002172 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002175
2176 s = PyString_AS_STRING(newobj);
2177
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002178 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002181 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002182 if (islower(c))
2183 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002185
Anthony Baxtera6286212006-04-11 07:42:36 +00002186 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187}
2188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190"S.title() -> string\n\
2191\n\
2192Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194
2195static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002196string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197{
2198 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002199 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002201 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202
Anthony Baxtera6286212006-04-11 07:42:36 +00002203 newobj = PyString_FromStringAndSize(NULL, n);
2204 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002206 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 for (i = 0; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (islower(c)) {
2210 if (!previous_is_cased)
2211 c = toupper(c);
2212 previous_is_cased = 1;
2213 } else if (isupper(c)) {
2214 if (previous_is_cased)
2215 c = tolower(c);
2216 previous_is_cased = 1;
2217 } else
2218 previous_is_cased = 0;
2219 *s_new++ = c;
2220 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002221 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222}
2223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225"S.capitalize() -> string\n\
2226\n\
2227Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
2230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002231string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002234 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002235 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Anthony Baxtera6286212006-04-11 07:42:36 +00002237 newobj = PyString_FromStringAndSize(NULL, n);
2238 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 if (0 < n) {
2242 int c = Py_CHARMASK(*s++);
2243 if (islower(c))
2244 *s_new = toupper(c);
2245 else
2246 *s_new = c;
2247 s_new++;
2248 }
2249 for (i = 1; i < n; i++) {
2250 int c = Py_CHARMASK(*s++);
2251 if (isupper(c))
2252 *s_new = tolower(c);
2253 else
2254 *s_new = c;
2255 s_new++;
2256 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
2260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002261PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262"S.count(sub[, start[, end]]) -> int\n\
2263\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002264Return the number of non-overlapping occurrences of substring sub in\n\
2265string S[start:end]. Optional arguments start and end are interpreted\n\
2266as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
2268static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002269string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002271 PyObject *sub_obj;
2272 const char *str = PyString_AS_STRING(self), *sub;
2273 Py_ssize_t sub_len;
2274 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002276 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002279
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 if (PyString_Check(sub_obj)) {
2281 sub = PyString_AS_STRING(sub_obj);
2282 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002284#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002286 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002287 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002288 if (count == -1)
2289 return NULL;
2290 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002291 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002294 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 return NULL;
2296
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002297 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002298
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002299 return PyInt_FromSsize_t(
2300 stringlib_count(str + start, end - start, sub, sub_len)
2301 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302}
2303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305"S.swapcase() -> string\n\
2306\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309
2310static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002311string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312{
2313 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002314 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002315 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Anthony Baxtera6286212006-04-11 07:42:36 +00002317 newobj = PyString_FromStringAndSize(NULL, n);
2318 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002320 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 for (i = 0; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (islower(c)) {
2324 *s_new = toupper(c);
2325 }
2326 else if (isupper(c)) {
2327 *s_new = tolower(c);
2328 }
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002333 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334}
2335
2336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002337PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338"S.translate(table [,deletechars]) -> string\n\
2339\n\
2340Return a copy of the string S, where all characters occurring\n\
2341in the optional argument deletechars are removed, and the\n\
2342remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002343translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344
2345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002346string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 register char *input, *output;
2349 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002350 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002353 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 PyObject *result;
2355 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002358 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361
2362 if (PyString_Check(tableobj)) {
2363 table1 = PyString_AS_STRING(tableobj);
2364 tablen = PyString_GET_SIZE(tableobj);
2365 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002366#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002368 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 parameter; instead a mapping to None will cause characters
2370 to be deleted. */
2371 if (delobj != NULL) {
2372 PyErr_SetString(PyExc_TypeError,
2373 "deletions are implemented differently for unicode");
2374 return NULL;
2375 }
2376 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2377 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002378#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381
Martin v. Löwis00b61272002-12-12 20:03:19 +00002382 if (tablen != 256) {
2383 PyErr_SetString(PyExc_ValueError,
2384 "translation table must be 256 characters long");
2385 return NULL;
2386 }
2387
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 if (delobj != NULL) {
2389 if (PyString_Check(delobj)) {
2390 del_table = PyString_AS_STRING(delobj);
2391 dellen = PyString_GET_SIZE(delobj);
2392 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002393#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394 else if (PyUnicode_Check(delobj)) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 }
2403 else {
2404 del_table = NULL;
2405 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406 }
2407
2408 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002409 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 result = PyString_FromStringAndSize((char *)NULL, inlen);
2411 if (result == NULL)
2412 return NULL;
2413 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002414 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415
2416 if (dellen == 0) {
2417 /* If no deletions are required, use faster code */
2418 for (i = inlen; --i >= 0; ) {
2419 c = Py_CHARMASK(*input++);
2420 if (Py_CHARMASK((*output++ = table[c])) != c)
2421 changed = 1;
2422 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002423 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 return result;
2425 Py_DECREF(result);
2426 Py_INCREF(input_obj);
2427 return input_obj;
2428 }
2429
2430 for (i = 0; i < 256; i++)
2431 trans_table[i] = Py_CHARMASK(table[i]);
2432
2433 for (i = 0; i < dellen; i++)
2434 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2435
2436 for (i = inlen; --i >= 0; ) {
2437 c = Py_CHARMASK(*input++);
2438 if (trans_table[c] != -1)
2439 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2440 continue;
2441 changed = 1;
2442 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002443 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2447 }
2448 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002449 if (inlen > 0)
2450 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 return result;
2452}
2453
2454
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002455#define FORWARD 1
2456#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002458/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002460#define findchar(target, target_len, c) \
2461 ((char *)memchr((const void *)(target), c, target_len))
2462
2463/* String ops must return a string. */
2464/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002465Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002466return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002468 if (PyString_CheckExact(self)) {
2469 Py_INCREF(self);
2470 return self;
2471 }
2472 return (PyStringObject *)PyString_FromStringAndSize(
2473 PyString_AS_STRING(self),
2474 PyString_GET_SIZE(self));
2475}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002477Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002478countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002479{
2480 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002481 const char *start=target;
2482 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002484 while ( (start=findchar(start, end-start, c)) != NULL ) {
2485 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002486 if (count >= maxcount)
2487 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002488 start += 1;
2489 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002490 return count;
2491}
2492
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002493Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002494findstring(const char *target, Py_ssize_t target_len,
2495 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002496 Py_ssize_t start,
2497 Py_ssize_t end,
2498 int direction)
2499{
2500 if (start < 0) {
2501 start += target_len;
2502 if (start < 0)
2503 start = 0;
2504 }
2505 if (end > target_len) {
2506 end = target_len;
2507 } else if (end < 0) {
2508 end += target_len;
2509 if (end < 0)
2510 end = 0;
2511 }
2512
2513 /* zero-length substrings always match at the first attempt */
2514 if (pattern_len == 0)
2515 return (direction > 0) ? start : end;
2516
2517 end -= pattern_len;
2518
2519 if (direction < 0) {
2520 for (; end >= start; end--)
2521 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2522 return end;
2523 } else {
2524 for (; start <= end; start++)
2525 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2526 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002527 }
2528 return -1;
2529}
2530
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002531Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002532countstring(const char *target, Py_ssize_t target_len,
2533 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002534 Py_ssize_t start,
2535 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002536 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002540 if (start < 0) {
2541 start += target_len;
2542 if (start < 0)
2543 start = 0;
2544 }
2545 if (end > target_len) {
2546 end = target_len;
2547 } else if (end < 0) {
2548 end += target_len;
2549 if (end < 0)
2550 end = 0;
2551 }
2552
2553 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002554 if (pattern_len == 0 || maxcount == 0) {
2555 if (target_len+1 < maxcount)
2556 return target_len+1;
2557 return maxcount;
2558 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559
2560 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002561 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002562 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2564 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002565 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 end -= pattern_len-1;
2567 }
2568 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002569 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2571 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002572 if (--maxcount <= 0)
2573 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002574 start += pattern_len-1;
2575 }
2576 }
2577 return count;
2578}
2579
2580
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002581/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002582
2583/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002584Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002585replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002586 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587 Py_ssize_t maxcount)
2588{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002589 char *self_s, *result_s;
2590 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002591 Py_ssize_t count, i, product;
2592 PyStringObject *result;
2593
2594 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002595
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002596 /* 1 at the end plus 1 after every character */
2597 count = self_len+1;
2598 if (maxcount < count)
2599 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002600
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002601 /* Check for overflow */
2602 /* result_len = count * to_len + self_len; */
2603 product = count * to_len;
2604 if (product / to_len != count) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "replace string is too long");
2607 return NULL;
2608 }
2609 result_len = product + self_len;
2610 if (result_len < 0) {
2611 PyErr_SetString(PyExc_OverflowError,
2612 "replace string is too long");
2613 return NULL;
2614 }
2615
2616 if (! (result = (PyStringObject *)
2617 PyString_FromStringAndSize(NULL, result_len)) )
2618 return NULL;
2619
2620 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002621 result_s = PyString_AS_STRING(result);
2622
2623 /* TODO: special case single character, which doesn't need memcpy */
2624
2625 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002626 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002627 result_s += to_len;
2628 count -= 1;
2629
2630 for (i=0; i<count; i++) {
2631 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002632 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633 result_s += to_len;
2634 }
2635
2636 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002637 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638
2639 return result;
2640}
2641
2642/* Special case for deleting a single character */
2643/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002644Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002645replace_delete_single_character(PyStringObject *self,
2646 char from_c, Py_ssize_t maxcount)
2647{
2648 char *self_s, *result_s;
2649 char *start, *next, *end;
2650 Py_ssize_t self_len, result_len;
2651 Py_ssize_t count;
2652 PyStringObject *result;
2653
2654 self_len = PyString_GET_SIZE(self);
2655 self_s = PyString_AS_STRING(self);
2656
Andrew Dalke51324072006-05-26 20:25:22 +00002657 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002658 if (count == 0) {
2659 return return_self(self);
2660 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661
2662 result_len = self_len - count; /* from_len == 1 */
2663 assert(result_len>=0);
2664
2665 if ( (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2667 return NULL;
2668 result_s = PyString_AS_STRING(result);
2669
2670 start = self_s;
2671 end = self_s + self_len;
2672 while (count-- > 0) {
2673 next = findchar(start, end-start, from_c);
2674 if (next == NULL)
2675 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002676 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677 result_s += (next-start);
2678 start = next+1;
2679 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002680 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002681
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002682 return result;
2683}
2684
2685/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2686
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002687Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002688replace_delete_substring(PyStringObject *self,
2689 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002690 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002691 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002692 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002693 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002694 Py_ssize_t count, offset;
2695 PyStringObject *result;
2696
2697 self_len = PyString_GET_SIZE(self);
2698 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002699
2700 count = countstring(self_s, self_len,
2701 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002702 0, self_len, 1,
2703 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704
2705 if (count == 0) {
2706 /* no matches */
2707 return return_self(self);
2708 }
2709
2710 result_len = self_len - (count * from_len);
2711 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002712
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002713 if ( (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2715 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002716
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002717 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002718
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 start = self_s;
2720 end = self_s + self_len;
2721 while (count-- > 0) {
2722 offset = findstring(start, end-start,
2723 from_s, from_len,
2724 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002725 if (offset == -1)
2726 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002727 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002728
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002729 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002730
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731 result_s += (next-start);
2732 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002734 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002735 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002736}
2737
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002738/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002739Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740replace_single_character_in_place(PyStringObject *self,
2741 char from_c, char to_c,
2742 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002743{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002744 char *self_s, *result_s, *start, *end, *next;
2745 Py_ssize_t self_len;
2746 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002747
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002748 /* The result string will be the same size */
2749 self_s = PyString_AS_STRING(self);
2750 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002753
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 if (next == NULL) {
2755 /* No matches; return the original string */
2756 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002757 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002758
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002760 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002761 if (result == NULL)
2762 return NULL;
2763 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002764 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002765
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766 /* change everything in-place, starting with this one */
2767 start = result_s + (next-self_s);
2768 *start = to_c;
2769 start++;
2770 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002771
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772 while (--maxcount > 0) {
2773 next = findchar(start, end-start, from_c);
2774 if (next == NULL)
2775 break;
2776 *next = to_c;
2777 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002778 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002779
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002780 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002781}
2782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002784Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002786 const char *from_s, Py_ssize_t from_len,
2787 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 Py_ssize_t maxcount)
2789{
2790 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002791 char *self_s;
2792 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002794
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002795 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002796
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 self_s = PyString_AS_STRING(self);
2798 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 offset = findstring(self_s, self_len,
2801 from_s, from_len,
2802 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 if (offset == -1) {
2804 /* No matches; return the original string */
2805 return return_self(self);
2806 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002807
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002809 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810 if (result == NULL)
2811 return NULL;
2812 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002813 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002814
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 /* change everything in-place, starting with this one */
2816 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002817 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 start += from_len;
2819 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002820
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002821 while ( --maxcount > 0) {
2822 offset = findstring(start, end-start,
2823 from_s, from_len,
2824 0, end-start, FORWARD);
2825 if (offset==-1)
2826 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002827 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002828 start += offset+from_len;
2829 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002830
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002831 return result;
2832}
2833
2834/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002835Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836replace_single_character(PyStringObject *self,
2837 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002838 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 Py_ssize_t maxcount)
2840{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002841 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002843 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 Py_ssize_t count, product;
2845 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002846
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 self_s = PyString_AS_STRING(self);
2848 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002849
Andrew Dalke51324072006-05-26 20:25:22 +00002850 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2854 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002855
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product = count * (to_len-1);
2859 if (product / (to_len-1) != count) {
2860 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861 return NULL;
2862 }
2863 result_len = self_len + product;
2864 if (result_len < 0) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2867 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002868
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 if ( (result = (PyStringObject *)
2870 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871 return NULL;
2872 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 start = self_s;
2875 end = self_s + self_len;
2876 while (count-- > 0) {
2877 next = findchar(start, end-start, from_c);
2878 if (next == NULL)
2879 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002880
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 if (next == start) {
2882 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002883 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002884 result_s += to_len;
2885 start += 1;
2886 } else {
2887 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002888 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002889 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002890 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891 result_s += to_len;
2892 start = next+1;
2893 }
2894 }
2895 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002896 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002897
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 return result;
2899}
2900
2901/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002902Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002904 const char *from_s, Py_ssize_t from_len,
2905 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002907 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002909 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002910 Py_ssize_t count, offset, product;
2911 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002912
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002913 self_s = PyString_AS_STRING(self);
2914 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002915
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916 count = countstring(self_s, self_len,
2917 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002918 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919 if (count == 0) {
2920 /* no matches, return unchanged */
2921 return return_self(self);
2922 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002923
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002924 /* Check for overflow */
2925 /* result_len = self_len + count * (to_len-from_len) */
2926 product = count * (to_len-from_len);
2927 if (product / (to_len-from_len) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2930 }
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002936
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002941
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 offset = findstring(start, end-start,
2946 from_s, from_len,
2947 0, end-start, FORWARD);
2948 if (offset == -1)
2949 break;
2950 next = start+offset;
2951 if (next == start) {
2952 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002953 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002954 result_s += to_len;
2955 start += from_len;
2956 } else {
2957 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002960 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 result_s += to_len;
2962 start = next+from_len;
2963 }
2964 }
2965 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002966 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002967
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002968 return result;
2969}
2970
2971
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002972Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002974 const char *from_s, Py_ssize_t from_len,
2975 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 Py_ssize_t maxcount)
2977{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 if (maxcount < 0) {
2979 maxcount = PY_SSIZE_T_MAX;
2980 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2981 /* nothing to do; return the original string */
2982 return return_self(self);
2983 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002984
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002985 if (maxcount == 0 ||
2986 (from_len == 0 && to_len == 0)) {
2987 /* nothing to do; return the original string */
2988 return return_self(self);
2989 }
2990
2991 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002992
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002993 if (from_len == 0) {
2994 /* insert the 'to' string everywhere. */
2995 /* >>> "Python".replace("", ".") */
2996 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002997 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998 }
2999
3000 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001 /* point for an empty self string to generate a non-empty string */
3002 /* Special case so the remaining code always gets a non-empty string */
3003 if (PyString_GET_SIZE(self) == 0) {
3004 return return_self(self);
3005 }
3006
3007 if (to_len == 0) {
3008 /* delete all occurances of 'from' string */
3009 if (from_len == 1) {
3010 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003011 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003013 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003014 }
3015 }
3016
3017 /* Handle special case where both strings have the same length */
3018
3019 if (from_len == to_len) {
3020 if (from_len == 1) {
3021 return replace_single_character_in_place(
3022 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003023 from_s[0],
3024 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003025 maxcount);
3026 } else {
3027 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003028 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 }
3030 }
3031
3032 /* Otherwise use the more generic algorithms */
3033 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003034 return replace_single_character(self, from_s[0],
3035 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003036 } else {
3037 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003038 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 }
3040}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003042PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003043"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044\n\
3045Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003046old replaced by new. If the optional argument count is\n\
3047given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
3049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003050string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003052 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003054 const char *from_s, *to_s;
3055 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003057 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003060 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003061 from_s = PyString_AS_STRING(from);
3062 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003064#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003066 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003067 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003068#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003069 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 return NULL;
3071
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 to_s = PyString_AS_STRING(to);
3074 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003079 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003081 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 return NULL;
3083
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003085 from_s, from_len,
3086 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087}
3088
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003091/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003092 * against substr, using the start and end arguments. Returns
3093 * -1 on error, 0 if not found and 1 if found.
3094 */
3095Py_LOCAL(int)
3096_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3097 Py_ssize_t end, int direction)
3098{
3099 Py_ssize_t len = PyString_GET_SIZE(self);
3100 Py_ssize_t slen;
3101 const char* sub;
3102 const char* str;
3103
3104 if (PyString_Check(substr)) {
3105 sub = PyString_AS_STRING(substr);
3106 slen = PyString_GET_SIZE(substr);
3107 }
3108#ifdef Py_USING_UNICODE
3109 else if (PyUnicode_Check(substr))
3110 return PyUnicode_Tailmatch((PyObject *)self,
3111 substr, start, end, direction);
3112#endif
3113 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3114 return -1;
3115 str = PyString_AS_STRING(self);
3116
3117 string_adjust_indices(&start, &end, len);
3118
3119 if (direction < 0) {
3120 /* startswith */
3121 if (start+slen > len)
3122 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003123 } else {
3124 /* endswith */
3125 if (end-start < slen || start > len)
3126 return 0;
3127
3128 if (end-slen > start)
3129 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003130 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003131 if (end-start >= slen)
3132 return ! memcmp(str+start, sub, slen);
3133 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003134}
3135
3136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003137PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003138"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003140Return True if S starts with the specified prefix, False otherwise.\n\
3141With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003142With optional end, stop comparing S at that position.\n\
3143prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144
3145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003146string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003148 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003149 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003151 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152
Guido van Rossumc6821402000-05-08 14:08:05 +00003153 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3154 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003156 if (PyTuple_Check(subobj)) {
3157 Py_ssize_t i;
3158 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3159 result = _string_tailmatch(self,
3160 PyTuple_GET_ITEM(subobj, i),
3161 start, end, -1);
3162 if (result == -1)
3163 return NULL;
3164 else if (result) {
3165 Py_RETURN_TRUE;
3166 }
3167 }
3168 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 }
Georg Brandl24250812006-06-09 18:45:48 +00003170 result = _string_tailmatch(self, subobj, start, end, -1);
3171 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003172 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003173 else
Georg Brandl24250812006-06-09 18:45:48 +00003174 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003175}
3176
3177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003178PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003179"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003181Return True if S ends with the specified suffix, False otherwise.\n\
3182With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003183With optional end, stop comparing S at that position.\n\
3184suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185
3186static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003187string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003189 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003190 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003192 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193
Guido van Rossumc6821402000-05-08 14:08:05 +00003194 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3195 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003197 if (PyTuple_Check(subobj)) {
3198 Py_ssize_t i;
3199 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3200 result = _string_tailmatch(self,
3201 PyTuple_GET_ITEM(subobj, i),
3202 start, end, +1);
3203 if (result == -1)
3204 return NULL;
3205 else if (result) {
3206 Py_RETURN_TRUE;
3207 }
3208 }
3209 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 }
Georg Brandl24250812006-06-09 18:45:48 +00003211 result = _string_tailmatch(self, subobj, start, end, +1);
3212 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003214 else
Georg Brandl24250812006-06-09 18:45:48 +00003215 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216}
3217
3218
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003219PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003220"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003221\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003222Encodes S using the codec registered for encoding. encoding defaults\n\
3223to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003225a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226'xmlcharrefreplace' as well as any other name registered with\n\
3227codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003228
3229static PyObject *
3230string_encode(PyStringObject *self, PyObject *args)
3231{
3232 char *encoding = NULL;
3233 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003234 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003235
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003236 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003238 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003239 if (v == NULL)
3240 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003241 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242 PyErr_Format(PyExc_TypeError,
3243 "encoder did not return a string/unicode object "
3244 "(type=%.400s)",
3245 v->ob_type->tp_name);
3246 Py_DECREF(v);
3247 return NULL;
3248 }
3249 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003250
3251 onError:
3252 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003253}
3254
3255
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003256PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003257"S.decode([encoding[,errors]]) -> object\n\
3258\n\
3259Decodes S using the codec registered for encoding. encoding defaults\n\
3260to the default encoding. errors may be given to set a different error\n\
3261handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003262a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263as well as any other name registerd with codecs.register_error that is\n\
3264able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003265
3266static PyObject *
3267string_decode(PyStringObject *self, PyObject *args)
3268{
3269 char *encoding = NULL;
3270 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003271 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003272
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003275 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003276 if (v == NULL)
3277 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003278 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279 PyErr_Format(PyExc_TypeError,
3280 "decoder did not return a string/unicode object "
3281 "(type=%.400s)",
3282 v->ob_type->tp_name);
3283 Py_DECREF(v);
3284 return NULL;
3285 }
3286 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003287
3288 onError:
3289 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003290}
3291
3292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003293PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294"S.expandtabs([tabsize]) -> string\n\
3295\n\
3296Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003297If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003298
3299static PyObject*
3300string_expandtabs(PyStringObject *self, PyObject *args)
3301{
3302 const char *e, *p;
3303 char *q;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003304 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305 PyObject *u;
3306 int tabsize = 8;
3307
3308 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309 return NULL;
3310
Thomas Wouters7e474022000-07-16 12:04:32 +00003311 /* First pass: determine size of output string */
Neal Norwitz66e64e22007-06-09 04:06:30 +00003312 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3314 for (p = PyString_AS_STRING(self); p < e; p++)
3315 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003316 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003317 j += tabsize - (j % tabsize);
Neal Norwitz66e64e22007-06-09 04:06:30 +00003318 if (old_j > j) {
Neal Norwitz8355dd52007-06-11 04:32:41 +00003319 PyErr_SetString(PyExc_OverflowError,
3320 "new string is too long");
Neal Norwitz66e64e22007-06-09 04:06:30 +00003321 return NULL;
3322 }
3323 old_j = j;
3324 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003325 }
3326 else {
3327 j++;
3328 if (*p == '\n' || *p == '\r') {
3329 i += j;
Neal Norwitz8355dd52007-06-11 04:32:41 +00003330 old_j = j = 0;
3331 if (i < 0) {
3332 PyErr_SetString(PyExc_OverflowError,
3333 "new string is too long");
3334 return NULL;
3335 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003336 }
3337 }
3338
Neal Norwitz66e64e22007-06-09 04:06:30 +00003339 if ((i + j) < 0) {
3340 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3341 return NULL;
3342 }
3343
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344 /* Second pass: create output string and fill it */
3345 u = PyString_FromStringAndSize(NULL, i + j);
3346 if (!u)
3347 return NULL;
3348
3349 j = 0;
3350 q = PyString_AS_STRING(u);
3351
3352 for (p = PyString_AS_STRING(self); p < e; p++)
3353 if (*p == '\t') {
3354 if (tabsize > 0) {
3355 i = tabsize - (j % tabsize);
3356 j += i;
3357 while (i--)
3358 *q++ = ' ';
3359 }
3360 }
3361 else {
3362 j++;
3363 *q++ = *p;
3364 if (*p == '\n' || *p == '\r')
3365 j = 0;
3366 }
3367
3368 return u;
3369}
3370
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003371Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003372pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373{
3374 PyObject *u;
3375
3376 if (left < 0)
3377 left = 0;
3378 if (right < 0)
3379 right = 0;
3380
Tim Peters8fa5dd02001-09-12 02:18:30 +00003381 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003382 Py_INCREF(self);
3383 return (PyObject *)self;
3384 }
3385
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003386 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 left + PyString_GET_SIZE(self) + right);
3388 if (u) {
3389 if (left)
3390 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003391 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003392 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003393 PyString_GET_SIZE(self));
3394 if (right)
3395 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3396 fill, right);
3397 }
3398
3399 return u;
3400}
3401
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003402PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003403"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003404"\n"
3405"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003406"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407
3408static PyObject *
3409string_ljust(PyStringObject *self, PyObject *args)
3410{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003411 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003412 char fillchar = ' ';
3413
Thomas Wouters4abb3662006-04-19 14:50:15 +00003414 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415 return NULL;
3416
Tim Peters8fa5dd02001-09-12 02:18:30 +00003417 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003418 Py_INCREF(self);
3419 return (PyObject*) self;
3420 }
3421
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003422 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003423}
3424
3425
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003426PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003427"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003428"\n"
3429"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003430"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003431
3432static PyObject *
3433string_rjust(PyStringObject *self, PyObject *args)
3434{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003435 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003436 char fillchar = ' ';
3437
Thomas Wouters4abb3662006-04-19 14:50:15 +00003438 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003439 return NULL;
3440
Tim Peters8fa5dd02001-09-12 02:18:30 +00003441 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003442 Py_INCREF(self);
3443 return (PyObject*) self;
3444 }
3445
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003446 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003447}
3448
3449
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003450PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003451"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003452"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003453"Return S centered in a string of length width. Padding is\n"
3454"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455
3456static PyObject *
3457string_center(PyStringObject *self, PyObject *args)
3458{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003459 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003460 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003461 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003462
Thomas Wouters4abb3662006-04-19 14:50:15 +00003463 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003464 return NULL;
3465
Tim Peters8fa5dd02001-09-12 02:18:30 +00003466 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003467 Py_INCREF(self);
3468 return (PyObject*) self;
3469 }
3470
3471 marg = width - PyString_GET_SIZE(self);
3472 left = marg / 2 + (marg & width & 1);
3473
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003474 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003475}
3476
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003477PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003478"S.zfill(width) -> string\n"
3479"\n"
3480"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003481"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003482
3483static PyObject *
3484string_zfill(PyStringObject *self, PyObject *args)
3485{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003486 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003487 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003488 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003489 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003490
Thomas Wouters4abb3662006-04-19 14:50:15 +00003491 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003492 return NULL;
3493
3494 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003495 if (PyString_CheckExact(self)) {
3496 Py_INCREF(self);
3497 return (PyObject*) self;
3498 }
3499 else
3500 return PyString_FromStringAndSize(
3501 PyString_AS_STRING(self),
3502 PyString_GET_SIZE(self)
3503 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003504 }
3505
3506 fill = width - PyString_GET_SIZE(self);
3507
3508 s = pad(self, fill, 0, '0');
3509
3510 if (s == NULL)
3511 return NULL;
3512
3513 p = PyString_AS_STRING(s);
3514 if (p[fill] == '+' || p[fill] == '-') {
3515 /* move sign to beginning of string */
3516 p[0] = p[fill];
3517 p[fill] = '0';
3518 }
3519
3520 return (PyObject*) s;
3521}
3522
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003523PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003524"S.isspace() -> bool\n\
3525\n\
3526Return True if all characters in S are whitespace\n\
3527and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003528
3529static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003530string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003531{
Fred Drakeba096332000-07-09 07:04:36 +00003532 register const unsigned char *p
3533 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003534 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535
Guido van Rossum4c08d552000-03-10 22:55:18 +00003536 /* Shortcut for single character strings */
3537 if (PyString_GET_SIZE(self) == 1 &&
3538 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003539 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003541 /* Special case for empty strings */
3542 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003543 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003544
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545 e = p + PyString_GET_SIZE(self);
3546 for (; p < e; p++) {
3547 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003548 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003549 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003551}
3552
3553
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003554PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003555"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003556\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003557Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003558and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003559
3560static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003561string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562{
Fred Drakeba096332000-07-09 07:04:36 +00003563 register const unsigned char *p
3564 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565 register const unsigned char *e;
3566
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567 /* Shortcut for single character strings */
3568 if (PyString_GET_SIZE(self) == 1 &&
3569 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571
3572 /* Special case for empty strings */
3573 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003574 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003575
3576 e = p + PyString_GET_SIZE(self);
3577 for (; p < e; p++) {
3578 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003579 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003580 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582}
3583
3584
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003585PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003588Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003589and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003590
3591static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003592string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593{
Fred Drakeba096332000-07-09 07:04:36 +00003594 register const unsigned char *p
3595 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596 register const unsigned char *e;
3597
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self) == 1 &&
3600 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602
3603 /* Special case for empty strings */
3604 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003605 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003606
3607 e = p + PyString_GET_SIZE(self);
3608 for (; p < e; p++) {
3609 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003610 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003611 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003613}
3614
3615
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003616PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003619Return True if all characters in S are digits\n\
3620and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621
3622static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003623string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624{
Fred Drakeba096332000-07-09 07:04:36 +00003625 register const unsigned char *p
3626 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003627 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003628
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 /* Shortcut for single character strings */
3630 if (PyString_GET_SIZE(self) == 1 &&
3631 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003634 /* Special case for empty strings */
3635 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003636 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003637
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638 e = p + PyString_GET_SIZE(self);
3639 for (; p < e; p++) {
3640 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003643 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003644}
3645
3646
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003647PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003651at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652
3653static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003654string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655{
Fred Drakeba096332000-07-09 07:04:36 +00003656 register const unsigned char *p
3657 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003658 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659 int cased;
3660
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661 /* Shortcut for single character strings */
3662 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003663 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003665 /* Special case for empty strings */
3666 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003668
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669 e = p + PyString_GET_SIZE(self);
3670 cased = 0;
3671 for (; p < e; p++) {
3672 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674 else if (!cased && islower(*p))
3675 cased = 1;
3676 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678}
3679
3680
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003681PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003684Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003685at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686
3687static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003688string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689{
Fred Drakeba096332000-07-09 07:04:36 +00003690 register const unsigned char *p
3691 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003692 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693 int cased;
3694
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695 /* Shortcut for single character strings */
3696 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003697 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003699 /* Special case for empty strings */
3700 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003702
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703 e = p + PyString_GET_SIZE(self);
3704 cased = 0;
3705 for (; p < e; p++) {
3706 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708 else if (!cased && isupper(*p))
3709 cased = 1;
3710 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003711 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712}
3713
3714
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003715PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003716"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003718Return True if S is a titlecased string and there is at least one\n\
3719character in S, i.e. uppercase characters may only follow uncased\n\
3720characters and lowercase characters only cased ones. Return False\n\
3721otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722
3723static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003724string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725{
Fred Drakeba096332000-07-09 07:04:36 +00003726 register const unsigned char *p
3727 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003728 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 int cased, previous_is_cased;
3730
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 /* Shortcut for single character strings */
3732 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003733 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003735 /* Special case for empty strings */
3736 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003737 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003738
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739 e = p + PyString_GET_SIZE(self);
3740 cased = 0;
3741 previous_is_cased = 0;
3742 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003743 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744
3745 if (isupper(ch)) {
3746 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003747 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 previous_is_cased = 1;
3749 cased = 1;
3750 }
3751 else if (islower(ch)) {
3752 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003753 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754 previous_is_cased = 1;
3755 cased = 1;
3756 }
3757 else
3758 previous_is_cased = 0;
3759 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761}
3762
3763
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003764PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003765"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766\n\
3767Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003768Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003769is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003770
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771static PyObject*
3772string_splitlines(PyStringObject *self, PyObject *args)
3773{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003774 register Py_ssize_t i;
3775 register Py_ssize_t j;
3776 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003777 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778 PyObject *list;
3779 PyObject *str;
3780 char *data;
3781
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003782 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783 return NULL;
3784
3785 data = PyString_AS_STRING(self);
3786 len = PyString_GET_SIZE(self);
3787
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003788 /* This does not use the preallocated list because splitlines is
3789 usually run with hundreds of newlines. The overhead of
3790 switching between PyList_SET_ITEM and append causes about a
3791 2-3% slowdown for that common case. A smarter implementation
3792 could move the if check out, so the SET_ITEMs are done first
3793 and the appends only done when the prealloc buffer is full.
3794 That's too much work for little gain.*/
3795
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796 list = PyList_New(0);
3797 if (!list)
3798 goto onError;
3799
3800 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003801 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003802
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803 /* Find a line and append it */
3804 while (i < len && data[i] != '\n' && data[i] != '\r')
3805 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003806
3807 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003808 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809 if (i < len) {
3810 if (data[i] == '\r' && i + 1 < len &&
3811 data[i+1] == '\n')
3812 i += 2;
3813 else
3814 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003815 if (keepends)
3816 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003818 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003819 j = i;
3820 }
3821 if (j < len) {
3822 SPLIT_APPEND(data, j, len);
3823 }
3824
3825 return list;
3826
3827 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003828 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003829 return NULL;
3830}
3831
3832#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003833#undef SPLIT_ADD
3834#undef MAX_PREALLOC
3835#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003837static PyObject *
3838string_getnewargs(PyStringObject *v)
3839{
3840 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3841}
3842
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003843
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003844static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003845string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003846 /* Counterparts of the obsolete stropmodule functions; except
3847 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003848 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3849 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003850 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003851 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3852 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003853 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3854 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3855 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3856 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3857 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3858 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3859 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003860 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3861 capitalize__doc__},
3862 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3863 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3864 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003865 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003866 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3867 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3868 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3869 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3870 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3871 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3872 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003873 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3874 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003875 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3876 startswith__doc__},
3877 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3878 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3879 swapcase__doc__},
3880 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3881 translate__doc__},
3882 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3883 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3884 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3885 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3886 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3887 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3888 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3889 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3890 expandtabs__doc__},
3891 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3892 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003893 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003894 {NULL, NULL} /* sentinel */
3895};
3896
Jeremy Hylton938ace62002-07-17 16:30:39 +00003897static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003898str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3899
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003900static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003901string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003902{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003903 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003904 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003905
Guido van Rossumae960af2001-08-30 03:11:59 +00003906 if (type != &PyString_Type)
3907 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003908 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3909 return NULL;
3910 if (x == NULL)
3911 return PyString_FromString("");
3912 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003913}
3914
Guido van Rossumae960af2001-08-30 03:11:59 +00003915static PyObject *
3916str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3917{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003918 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003919 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003920
3921 assert(PyType_IsSubtype(type, &PyString_Type));
3922 tmp = string_new(&PyString_Type, args, kwds);
3923 if (tmp == NULL)
3924 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003925 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003926 n = PyString_GET_SIZE(tmp);
3927 pnew = type->tp_alloc(type, n);
3928 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003929 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003930 ((PyStringObject *)pnew)->ob_shash =
3931 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003932 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003933 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003934 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003935 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003936}
3937
Guido van Rossumcacfc072002-05-24 19:01:59 +00003938static PyObject *
3939basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3940{
3941 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003942 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003943 return NULL;
3944}
3945
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003946static PyObject *
3947string_mod(PyObject *v, PyObject *w)
3948{
3949 if (!PyString_Check(v)) {
3950 Py_INCREF(Py_NotImplemented);
3951 return Py_NotImplemented;
3952 }
3953 return PyString_Format(v, w);
3954}
3955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003956PyDoc_STRVAR(basestring_doc,
3957"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003958
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003959static PyNumberMethods string_as_number = {
3960 0, /*nb_add*/
3961 0, /*nb_subtract*/
3962 0, /*nb_multiply*/
3963 0, /*nb_divide*/
3964 string_mod, /*nb_remainder*/
3965};
3966
3967
Guido van Rossumcacfc072002-05-24 19:01:59 +00003968PyTypeObject PyBaseString_Type = {
3969 PyObject_HEAD_INIT(&PyType_Type)
3970 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003971 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003972 0,
3973 0,
3974 0, /* tp_dealloc */
3975 0, /* tp_print */
3976 0, /* tp_getattr */
3977 0, /* tp_setattr */
3978 0, /* tp_compare */
3979 0, /* tp_repr */
3980 0, /* tp_as_number */
3981 0, /* tp_as_sequence */
3982 0, /* tp_as_mapping */
3983 0, /* tp_hash */
3984 0, /* tp_call */
3985 0, /* tp_str */
3986 0, /* tp_getattro */
3987 0, /* tp_setattro */
3988 0, /* tp_as_buffer */
3989 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3990 basestring_doc, /* tp_doc */
3991 0, /* tp_traverse */
3992 0, /* tp_clear */
3993 0, /* tp_richcompare */
3994 0, /* tp_weaklistoffset */
3995 0, /* tp_iter */
3996 0, /* tp_iternext */
3997 0, /* tp_methods */
3998 0, /* tp_members */
3999 0, /* tp_getset */
4000 &PyBaseObject_Type, /* tp_base */
4001 0, /* tp_dict */
4002 0, /* tp_descr_get */
4003 0, /* tp_descr_set */
4004 0, /* tp_dictoffset */
4005 0, /* tp_init */
4006 0, /* tp_alloc */
4007 basestring_new, /* tp_new */
4008 0, /* tp_free */
4009};
4010
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004011PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004012"str(object) -> string\n\
4013\n\
4014Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004015If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004016
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004017PyTypeObject PyString_Type = {
4018 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004019 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004020 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004022 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004023 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024 (printfunc)string_print, /* tp_print */
4025 0, /* tp_getattr */
4026 0, /* tp_setattr */
4027 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004028 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004029 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004030 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004031 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004032 (hashfunc)string_hash, /* tp_hash */
4033 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004034 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004035 PyObject_GenericGetAttr, /* tp_getattro */
4036 0, /* tp_setattro */
4037 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004038 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004039 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004040 string_doc, /* tp_doc */
4041 0, /* tp_traverse */
4042 0, /* tp_clear */
4043 (richcmpfunc)string_richcompare, /* tp_richcompare */
4044 0, /* tp_weaklistoffset */
4045 0, /* tp_iter */
4046 0, /* tp_iternext */
4047 string_methods, /* tp_methods */
4048 0, /* tp_members */
4049 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004050 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004051 0, /* tp_dict */
4052 0, /* tp_descr_get */
4053 0, /* tp_descr_set */
4054 0, /* tp_dictoffset */
4055 0, /* tp_init */
4056 0, /* tp_alloc */
4057 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004058 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004059};
4060
4061void
Fred Drakeba096332000-07-09 07:04:36 +00004062PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004063{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004064 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004065 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004067 if (w == NULL || !PyString_Check(*pv)) {
4068 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004069 *pv = NULL;
4070 return;
4071 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004072 v = string_concat((PyStringObject *) *pv, w);
4073 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004074 *pv = v;
4075}
4076
Guido van Rossum013142a1994-08-30 08:19:36 +00004077void
Fred Drakeba096332000-07-09 07:04:36 +00004078PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004079{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004080 PyString_Concat(pv, w);
4081 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004082}
4083
4084
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004085/* The following function breaks the notion that strings are immutable:
4086 it changes the size of a string. We get away with this only if there
4087 is only one module referencing the object. You can also think of it
4088 as creating a new string object and destroying the old one, only
4089 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004090 already be known to some other part of the code...
4091 Note that if there's not enough memory to resize the string, the original
4092 string object at *pv is deallocated, *pv is set to NULL, an "out of
4093 memory" exception is set, and -1 is returned. Else (on success) 0 is
4094 returned, and the value in *pv may or may not be the same as on input.
4095 As always, an extra byte is allocated for a trailing \0 byte (newsize
4096 does *not* include that), and a trailing \0 byte is stored.
4097*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004098
4099int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004100_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004101{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004102 register PyObject *v;
4103 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004104 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004105 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4106 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004107 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004108 Py_DECREF(v);
4109 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004110 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004112 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004113 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004114 _Py_ForgetReference(v);
4115 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004116 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004117 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004118 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004119 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004120 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004121 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004122 _Py_NewReference(*pv);
4123 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004124 sv->ob_size = newsize;
4125 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004126 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127 return 0;
4128}
Guido van Rossume5372401993-03-16 12:15:04 +00004129
4130/* Helpers for formatstring */
4131
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004132Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004133getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004134{
Thomas Wouters977485d2006-02-16 15:59:12 +00004135 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004136 if (argidx < arglen) {
4137 (*p_argidx)++;
4138 if (arglen < 0)
4139 return args;
4140 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004141 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004142 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004143 PyErr_SetString(PyExc_TypeError,
4144 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004145 return NULL;
4146}
4147
Tim Peters38fd5b62000-09-21 05:43:11 +00004148/* Format codes
4149 * F_LJUST '-'
4150 * F_SIGN '+'
4151 * F_BLANK ' '
4152 * F_ALT '#'
4153 * F_ZERO '0'
4154 */
Guido van Rossume5372401993-03-16 12:15:04 +00004155#define F_LJUST (1<<0)
4156#define F_SIGN (1<<1)
4157#define F_BLANK (1<<2)
4158#define F_ALT (1<<3)
4159#define F_ZERO (1<<4)
4160
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004161Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004162formatfloat(char *buf, size_t buflen, int flags,
4163 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004164{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004165 /* fmt = '%#.' + `prec` + `type`
4166 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004167 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004168 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004169 x = PyFloat_AsDouble(v);
4170 if (x == -1.0 && PyErr_Occurred()) {
4171 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004172 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004173 }
Guido van Rossume5372401993-03-16 12:15:04 +00004174 if (prec < 0)
4175 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004176 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4177 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004178 /* Worst case length calc to ensure no buffer overrun:
4179
4180 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004181 fmt = %#.<prec>g
4182 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004183 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004184 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004185
4186 'f' formats:
4187 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4188 len = 1 + 50 + 1 + prec = 52 + prec
4189
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004190 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004191 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004192
4193 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004194 if (((type == 'g' || type == 'G') &&
4195 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004196 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004197 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004198 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004199 return -1;
4200 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004201 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4202 (flags&F_ALT) ? "#" : "",
4203 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004204 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004205 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004206}
4207
Tim Peters38fd5b62000-09-21 05:43:11 +00004208/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4209 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4210 * Python's regular ints.
4211 * Return value: a new PyString*, or NULL if error.
4212 * . *pbuf is set to point into it,
4213 * *plen set to the # of chars following that.
4214 * Caller must decref it when done using pbuf.
4215 * The string starting at *pbuf is of the form
4216 * "-"? ("0x" | "0X")? digit+
4217 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004218 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004219 * There will be at least prec digits, zero-filled on the left if
4220 * necessary to get that many.
4221 * val object to be converted
4222 * flags bitmask of format flags; only F_ALT is looked at
4223 * prec minimum number of digits; 0-fill on left if needed
4224 * type a character in [duoxX]; u acts the same as d
4225 *
4226 * CAUTION: o, x and X conversions on regular ints can never
4227 * produce a '-' sign, but can for Python's unbounded ints.
4228 */
4229PyObject*
4230_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4231 char **pbuf, int *plen)
4232{
4233 PyObject *result = NULL;
4234 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004235 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004236 int sign; /* 1 if '-', else 0 */
4237 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004238 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004239 int numdigits; /* len == numnondigits + numdigits */
4240 int numnondigits = 0;
4241
4242 switch (type) {
4243 case 'd':
4244 case 'u':
4245 result = val->ob_type->tp_str(val);
4246 break;
4247 case 'o':
4248 result = val->ob_type->tp_as_number->nb_oct(val);
4249 break;
4250 case 'x':
4251 case 'X':
4252 numnondigits = 2;
4253 result = val->ob_type->tp_as_number->nb_hex(val);
4254 break;
4255 default:
4256 assert(!"'type' not in [duoxX]");
4257 }
4258 if (!result)
4259 return NULL;
4260
Neal Norwitz56423e52006-08-13 18:11:08 +00004261 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004262 if (!buf) {
4263 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004264 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004265 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004266
Tim Peters38fd5b62000-09-21 05:43:11 +00004267 /* To modify the string in-place, there can only be one reference. */
4268 if (result->ob_refcnt != 1) {
4269 PyErr_BadInternalCall();
4270 return NULL;
4271 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004272 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004273 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004274 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4275 return NULL;
4276 }
4277 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004278 if (buf[len-1] == 'L') {
4279 --len;
4280 buf[len] = '\0';
4281 }
4282 sign = buf[0] == '-';
4283 numnondigits += sign;
4284 numdigits = len - numnondigits;
4285 assert(numdigits > 0);
4286
Tim Petersfff53252001-04-12 18:38:48 +00004287 /* Get rid of base marker unless F_ALT */
4288 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004289 /* Need to skip 0x, 0X or 0. */
4290 int skipped = 0;
4291 switch (type) {
4292 case 'o':
4293 assert(buf[sign] == '0');
4294 /* If 0 is only digit, leave it alone. */
4295 if (numdigits > 1) {
4296 skipped = 1;
4297 --numdigits;
4298 }
4299 break;
4300 case 'x':
4301 case 'X':
4302 assert(buf[sign] == '0');
4303 assert(buf[sign + 1] == 'x');
4304 skipped = 2;
4305 numnondigits -= 2;
4306 break;
4307 }
4308 if (skipped) {
4309 buf += skipped;
4310 len -= skipped;
4311 if (sign)
4312 buf[0] = '-';
4313 }
4314 assert(len == numnondigits + numdigits);
4315 assert(numdigits > 0);
4316 }
4317
4318 /* Fill with leading zeroes to meet minimum width. */
4319 if (prec > numdigits) {
4320 PyObject *r1 = PyString_FromStringAndSize(NULL,
4321 numnondigits + prec);
4322 char *b1;
4323 if (!r1) {
4324 Py_DECREF(result);
4325 return NULL;
4326 }
4327 b1 = PyString_AS_STRING(r1);
4328 for (i = 0; i < numnondigits; ++i)
4329 *b1++ = *buf++;
4330 for (i = 0; i < prec - numdigits; i++)
4331 *b1++ = '0';
4332 for (i = 0; i < numdigits; i++)
4333 *b1++ = *buf++;
4334 *b1 = '\0';
4335 Py_DECREF(result);
4336 result = r1;
4337 buf = PyString_AS_STRING(result);
4338 len = numnondigits + prec;
4339 }
4340
4341 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004342 if (type == 'X') {
4343 /* Need to convert all lower case letters to upper case.
4344 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004345 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004346 if (buf[i] >= 'a' && buf[i] <= 'x')
4347 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004348 }
4349 *pbuf = buf;
4350 *plen = len;
4351 return result;
4352}
4353
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004354Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004355formatint(char *buf, size_t buflen, int flags,
4356 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004357{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004358 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004359 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4360 + 1 + 1 = 24 */
4361 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004362 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004363 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004364
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004365 x = PyInt_AsLong(v);
4366 if (x == -1 && PyErr_Occurred()) {
4367 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004368 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004369 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004370 if (x < 0 && type == 'u') {
4371 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004372 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004373 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4374 sign = "-";
4375 else
4376 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004377 if (prec < 0)
4378 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004379
4380 if ((flags & F_ALT) &&
4381 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004382 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004383 * of issues that cause pain:
4384 * - when 0 is being converted, the C standard leaves off
4385 * the '0x' or '0X', which is inconsistent with other
4386 * %#x/%#X conversions and inconsistent with Python's
4387 * hex() function
4388 * - there are platforms that violate the standard and
4389 * convert 0 with the '0x' or '0X'
4390 * (Metrowerks, Compaq Tru64)
4391 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004392 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004393 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004394 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004395 * We can achieve the desired consistency by inserting our
4396 * own '0x' or '0X' prefix, and substituting %x/%X in place
4397 * of %#x/%#X.
4398 *
4399 * Note that this is the same approach as used in
4400 * formatint() in unicodeobject.c
4401 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004402 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4403 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004404 }
4405 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004406 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4407 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004408 prec, type);
4409 }
4410
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004411 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4412 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004413 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004414 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004415 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004416 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004417 return -1;
4418 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004419 if (sign[0])
4420 PyOS_snprintf(buf, buflen, fmt, -x);
4421 else
4422 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004423 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004424}
4425
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004426Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004427formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004428{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004429 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004430 if (PyString_Check(v)) {
4431 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004432 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004433 }
4434 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004435 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004436 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004437 }
4438 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004439 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004440}
4441
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004442/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4443
4444 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4445 chars are formatted. XXX This is a magic number. Each formatting
4446 routine does bounds checking to ensure no overflow, but a better
4447 solution may be to malloc a buffer of appropriate size for each
4448 format. For now, the current solution is sufficient.
4449*/
4450#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004451
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004452PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004453PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004454{
4455 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004456 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004457 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004458 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004459 PyObject *result, *orig_args;
4460#ifdef Py_USING_UNICODE
4461 PyObject *v, *w;
4462#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463 PyObject *dict = NULL;
4464 if (format == NULL || !PyString_Check(format) || args == NULL) {
4465 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004466 return NULL;
4467 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004468 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004469 fmt = PyString_AS_STRING(format);
4470 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004471 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004472 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004473 if (result == NULL)
4474 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004475 res = PyString_AsString(result);
4476 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004477 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004478 argidx = 0;
4479 }
4480 else {
4481 arglen = -1;
4482 argidx = -2;
4483 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004484 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4485 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004486 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004487 while (--fmtcnt >= 0) {
4488 if (*fmt != '%') {
4489 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004490 rescnt = fmtcnt + 100;
4491 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004492 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004493 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004494 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004495 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004496 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004497 }
4498 *res++ = *fmt++;
4499 }
4500 else {
4501 /* Got a format specifier */
4502 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004503 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004504 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004505 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004506 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 PyObject *v = NULL;
4508 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004509 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004510 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004511 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004512 char formatbuf[FORMATBUFLEN];
4513 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004514#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004515 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004516 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004517#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004518
Guido van Rossumda9c2711996-12-05 21:58:58 +00004519 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004520 if (*fmt == '(') {
4521 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004522 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004523 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004524 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004525
4526 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004527 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004528 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004529 goto error;
4530 }
4531 ++fmt;
4532 --fmtcnt;
4533 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004534 /* Skip over balanced parentheses */
4535 while (pcount > 0 && --fmtcnt >= 0) {
4536 if (*fmt == ')')
4537 --pcount;
4538 else if (*fmt == '(')
4539 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004540 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004541 }
4542 keylen = fmt - keystart - 1;
4543 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004544 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004545 "incomplete format key");
4546 goto error;
4547 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004548 key = PyString_FromStringAndSize(keystart,
4549 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004550 if (key == NULL)
4551 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004552 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004553 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004554 args_owned = 0;
4555 }
4556 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004557 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004558 if (args == NULL) {
4559 goto error;
4560 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004561 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004562 arglen = -1;
4563 argidx = -2;
4564 }
Guido van Rossume5372401993-03-16 12:15:04 +00004565 while (--fmtcnt >= 0) {
4566 switch (c = *fmt++) {
4567 case '-': flags |= F_LJUST; continue;
4568 case '+': flags |= F_SIGN; continue;
4569 case ' ': flags |= F_BLANK; continue;
4570 case '#': flags |= F_ALT; continue;
4571 case '0': flags |= F_ZERO; continue;
4572 }
4573 break;
4574 }
4575 if (c == '*') {
4576 v = getnextarg(args, arglen, &argidx);
4577 if (v == NULL)
4578 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004579 if (!PyInt_Check(v)) {
4580 PyErr_SetString(PyExc_TypeError,
4581 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004582 goto error;
4583 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004584 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004585 if (width < 0) {
4586 flags |= F_LJUST;
4587 width = -width;
4588 }
Guido van Rossume5372401993-03-16 12:15:04 +00004589 if (--fmtcnt >= 0)
4590 c = *fmt++;
4591 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004592 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004593 width = c - '0';
4594 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004595 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004596 if (!isdigit(c))
4597 break;
4598 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004599 PyErr_SetString(
4600 PyExc_ValueError,
4601 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004602 goto error;
4603 }
4604 width = width*10 + (c - '0');
4605 }
4606 }
4607 if (c == '.') {
4608 prec = 0;
4609 if (--fmtcnt >= 0)
4610 c = *fmt++;
4611 if (c == '*') {
4612 v = getnextarg(args, arglen, &argidx);
4613 if (v == NULL)
4614 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004615 if (!PyInt_Check(v)) {
4616 PyErr_SetString(
4617 PyExc_TypeError,
4618 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004619 goto error;
4620 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004621 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004622 if (prec < 0)
4623 prec = 0;
4624 if (--fmtcnt >= 0)
4625 c = *fmt++;
4626 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004627 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004628 prec = c - '0';
4629 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004630 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004631 if (!isdigit(c))
4632 break;
4633 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004634 PyErr_SetString(
4635 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004636 "prec too big");
4637 goto error;
4638 }
4639 prec = prec*10 + (c - '0');
4640 }
4641 }
4642 } /* prec */
4643 if (fmtcnt >= 0) {
4644 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004645 if (--fmtcnt >= 0)
4646 c = *fmt++;
4647 }
4648 }
4649 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004650 PyErr_SetString(PyExc_ValueError,
4651 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004652 goto error;
4653 }
4654 if (c != '%') {
4655 v = getnextarg(args, arglen, &argidx);
4656 if (v == NULL)
4657 goto error;
4658 }
4659 sign = 0;
4660 fill = ' ';
4661 switch (c) {
4662 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004663 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004664 len = 1;
4665 break;
4666 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004667#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004668 if (PyUnicode_Check(v)) {
4669 fmt = fmt_start;
4670 argidx = argidx_start;
4671 goto unicode;
4672 }
Georg Brandld45014b2005-10-01 17:06:00 +00004673#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004674 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004675#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004676 if (temp != NULL && PyUnicode_Check(temp)) {
4677 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004678 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004679 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004680 goto unicode;
4681 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004682#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004683 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004684 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004685 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004686 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004687 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004688 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004689 if (!PyString_Check(temp)) {
4690 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004691 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004692 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004693 goto error;
4694 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004695 pbuf = PyString_AS_STRING(temp);
4696 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004697 if (prec >= 0 && len > prec)
4698 len = prec;
4699 break;
4700 case 'i':
4701 case 'd':
4702 case 'u':
4703 case 'o':
4704 case 'x':
4705 case 'X':
4706 if (c == 'i')
4707 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004708 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004709 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004711 prec, c, &pbuf, &ilen);
4712 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004713 if (!temp)
4714 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004715 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004716 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004717 else {
4718 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004719 len = formatint(pbuf,
4720 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004721 flags, prec, c, v);
4722 if (len < 0)
4723 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004724 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004725 }
4726 if (flags & F_ZERO)
4727 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004728 break;
4729 case 'e':
4730 case 'E':
4731 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004732 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004733 case 'g':
4734 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004735 if (c == 'F')
4736 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004737 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004738 len = formatfloat(pbuf, sizeof(formatbuf),
4739 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004740 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004741 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004742 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004743 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004744 fill = '0';
4745 break;
4746 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004747#ifdef Py_USING_UNICODE
4748 if (PyUnicode_Check(v)) {
4749 fmt = fmt_start;
4750 argidx = argidx_start;
4751 goto unicode;
4752 }
4753#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004754 pbuf = formatbuf;
4755 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004756 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004757 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004758 break;
4759 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004760 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004761 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004762 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004763 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004764 (Py_ssize_t)(fmt - 1 -
4765 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004766 goto error;
4767 }
4768 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004769 if (*pbuf == '-' || *pbuf == '+') {
4770 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004771 len--;
4772 }
4773 else if (flags & F_SIGN)
4774 sign = '+';
4775 else if (flags & F_BLANK)
4776 sign = ' ';
4777 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004778 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004779 }
4780 if (width < len)
4781 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004782 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004783 reslen -= rescnt;
4784 rescnt = width + fmtcnt + 100;
4785 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004786 if (reslen < 0) {
4787 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004788 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004789 return PyErr_NoMemory();
4790 }
Georg Brandl5f795862007-02-26 13:51:34 +00004791 if (_PyString_Resize(&result, reslen) < 0) {
4792 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004793 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004794 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004795 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004796 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004797 }
4798 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004799 if (fill != ' ')
4800 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004801 rescnt--;
4802 if (width > len)
4803 width--;
4804 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004805 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4806 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004807 assert(pbuf[1] == c);
4808 if (fill != ' ') {
4809 *res++ = *pbuf++;
4810 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004811 }
Tim Petersfff53252001-04-12 18:38:48 +00004812 rescnt -= 2;
4813 width -= 2;
4814 if (width < 0)
4815 width = 0;
4816 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 }
4818 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004819 do {
4820 --rescnt;
4821 *res++ = fill;
4822 } while (--width > len);
4823 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 if (fill == ' ') {
4825 if (sign)
4826 *res++ = sign;
4827 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004828 (c == 'x' || c == 'X')) {
4829 assert(pbuf[0] == '0');
4830 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004831 *res++ = *pbuf++;
4832 *res++ = *pbuf++;
4833 }
4834 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004835 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004836 res += len;
4837 rescnt -= len;
4838 while (--width >= len) {
4839 --rescnt;
4840 *res++ = ' ';
4841 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004842 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004843 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004844 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004845 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004846 goto error;
4847 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004848 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004849 } /* '%' */
4850 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004851 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004852 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004853 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004854 goto error;
4855 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004856 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004857 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004858 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004859 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004860 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004861
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004862#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004863 unicode:
4864 if (args_owned) {
4865 Py_DECREF(args);
4866 args_owned = 0;
4867 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004868 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004869 if (PyTuple_Check(orig_args) && argidx > 0) {
4870 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004871 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004872 v = PyTuple_New(n);
4873 if (v == NULL)
4874 goto error;
4875 while (--n >= 0) {
4876 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4877 Py_INCREF(w);
4878 PyTuple_SET_ITEM(v, n, w);
4879 }
4880 args = v;
4881 } else {
4882 Py_INCREF(orig_args);
4883 args = orig_args;
4884 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004885 args_owned = 1;
4886 /* Take what we have of the result and let the Unicode formatting
4887 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004888 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004889 if (_PyString_Resize(&result, rescnt))
4890 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004891 fmtcnt = PyString_GET_SIZE(format) - \
4892 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004893 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4894 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004895 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004896 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004897 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004898 if (v == NULL)
4899 goto error;
4900 /* Paste what we have (result) to what the Unicode formatting
4901 function returned (v) and return the result (or error) */
4902 w = PyUnicode_Concat(result, v);
4903 Py_DECREF(result);
4904 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004905 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004906 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004907#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004908
Guido van Rossume5372401993-03-16 12:15:04 +00004909 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004910 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004911 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004912 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004913 }
Guido van Rossume5372401993-03-16 12:15:04 +00004914 return NULL;
4915}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004916
Guido van Rossum2a61e741997-01-18 07:55:05 +00004917void
Fred Drakeba096332000-07-09 07:04:36 +00004918PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004919{
4920 register PyStringObject *s = (PyStringObject *)(*p);
4921 PyObject *t;
4922 if (s == NULL || !PyString_Check(s))
4923 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004924 /* If it's a string subclass, we don't really know what putting
4925 it in the interned dict might do. */
4926 if (!PyString_CheckExact(s))
4927 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004928 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004929 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004930 if (interned == NULL) {
4931 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004932 if (interned == NULL) {
4933 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004935 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004937 t = PyDict_GetItem(interned, (PyObject *)s);
4938 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004939 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004940 Py_DECREF(*p);
4941 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942 return;
4943 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004944
Armin Rigo79f7ad22004-08-07 19:27:39 +00004945 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004946 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004947 return;
4948 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004949 /* The two references in interned are not counted by refcnt.
4950 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004951 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004952 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004953}
4954
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004955void
4956PyString_InternImmortal(PyObject **p)
4957{
4958 PyString_InternInPlace(p);
4959 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4960 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4961 Py_INCREF(*p);
4962 }
4963}
4964
Guido van Rossum2a61e741997-01-18 07:55:05 +00004965
4966PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004967PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004968{
4969 PyObject *s = PyString_FromString(cp);
4970 if (s == NULL)
4971 return NULL;
4972 PyString_InternInPlace(&s);
4973 return s;
4974}
4975
Guido van Rossum8cf04761997-08-02 02:57:45 +00004976void
Fred Drakeba096332000-07-09 07:04:36 +00004977PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004978{
4979 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004980 for (i = 0; i < UCHAR_MAX + 1; i++) {
4981 Py_XDECREF(characters[i]);
4982 characters[i] = NULL;
4983 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004984 Py_XDECREF(nullstring);
4985 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004986}
Barry Warsawa903ad982001-02-23 16:40:48 +00004987
Barry Warsawa903ad982001-02-23 16:40:48 +00004988void _Py_ReleaseInternedStrings(void)
4989{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004990 PyObject *keys;
4991 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004992 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004993
4994 if (interned == NULL || !PyDict_Check(interned))
4995 return;
4996 keys = PyDict_Keys(interned);
4997 if (keys == NULL || !PyList_Check(keys)) {
4998 PyErr_Clear();
4999 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005000 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005001
5002 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5003 detector, interned strings are not forcibly deallocated; rather, we
5004 give them their stolen references back, and then clear and DECREF
5005 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005006
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005007 fprintf(stderr, "releasing interned strings\n");
5008 n = PyList_GET_SIZE(keys);
5009 for (i = 0; i < n; i++) {
5010 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5011 switch (s->ob_sstate) {
5012 case SSTATE_NOT_INTERNED:
5013 /* XXX Shouldn't happen */
5014 break;
5015 case SSTATE_INTERNED_IMMORTAL:
5016 s->ob_refcnt += 1;
5017 break;
5018 case SSTATE_INTERNED_MORTAL:
5019 s->ob_refcnt += 2;
5020 break;
5021 default:
5022 Py_FatalError("Inconsistent interned string state.");
5023 }
5024 s->ob_sstate = SSTATE_NOT_INTERNED;
5025 }
5026 Py_DECREF(keys);
5027 PyDict_Clear(interned);
5028 Py_DECREF(interned);
5029 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005030}