blob: 3f1e482857b2a7c073975eafb9a97f5c3b2d1ca3 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossum45ec02a2002-08-19 21:43:18 +000015/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
Tim Petersae1d0c92006-03-17 03:29:34 +000020 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000021 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
82 memcpy(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 memcpy(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 memcpy(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Martin v. Löwis18e16552006-02-15 17:27:45 +0000692static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693string_getsize(register PyObject *op)
694{
695 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (PyString_AsStringAndSize(op, &s, &len))
698 return -1;
699 return len;
700}
701
702static /*const*/ char *
703string_getbuffer(register PyObject *op)
704{
705 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000706 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (PyString_AsStringAndSize(op, &s, &len))
708 return NULL;
709 return s;
710}
711
Martin v. Löwis18e16552006-02-15 17:27:45 +0000712Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000713PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (!PyString_Check(op))
716 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000717 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718}
719
720/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000721PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 if (!PyString_Check(op))
724 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726}
727
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728int
729PyString_AsStringAndSize(register PyObject *obj,
730 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732{
733 if (s == NULL) {
734 PyErr_BadInternalCall();
735 return -1;
736 }
737
738 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000739#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740 if (PyUnicode_Check(obj)) {
741 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
742 if (obj == NULL)
743 return -1;
744 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000745 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#endif
747 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000748 PyErr_Format(PyExc_TypeError,
749 "expected string or Unicode object, "
750 "%.200s found", obj->ob_type->tp_name);
751 return -1;
752 }
753 }
754
755 *s = PyString_AS_STRING(obj);
756 if (len != NULL)
757 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000758 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_SetString(PyExc_TypeError,
760 "expected string without null bytes");
761 return -1;
762 }
763 return 0;
764}
765
Fredrik Lundhaf722372006-05-25 17:55:31 +0000766/* -------------------------------------------------------------------- */
767/* Helpers */
768
769#define USE_FAST /* experimental fast search implementation */
770
771/* XXX - this code is copied from unicodeobject.c. we really should
772 refactor the core implementations (see _sre.c for how this can be
773 done), but that'll have to wait -- fredrik */
774
775/* fast search/count implementation, based on a mix between boyer-
776 moore and horspool, with a few more bells and whistles on the top.
777 for some more background, see: http://effbot.org/stringlib */
778
779/* note: fastsearch may access s[n], which isn't a problem when using
780 Python's ordinary string types, but may cause problems if you're
781 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000782 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000783 it has actually checked for matches, but didn't find any. callers
784 beware! */
785
786#define FAST_COUNT 0
787#define FAST_SEARCH 1
788
Fredrik Lundh95e2a912006-05-26 11:38:15 +0000789Py_LOCAL(Py_ssize_t)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000790fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
Fredrik Lundhaf722372006-05-25 17:55:31 +0000791{
792 long mask;
Fredrik Lundhf2c0dfd2006-05-26 10:27:17 +0000793 Py_ssize_t skip, count = 0;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000794 Py_ssize_t i, j, mlast, w;
795
796 w = n - m;
797
798 if (w < 0)
799 return -1;
800
801 /* look for special cases */
802 if (m <= 1) {
803 if (m <= 0)
804 return -1;
805 /* use special case for 1-character strings */
806 if (mode == FAST_COUNT) {
807 for (i = 0; i < n; i++)
808 if (s[i] == p[0])
809 count++;
810 return count;
811 } else {
812 for (i = 0; i < n; i++)
813 if (s[i] == p[0])
814 return i;
815 }
816 return -1;
817 }
818
819 mlast = m - 1;
820
821 /* create compressed boyer-moore delta 1 table */
822 skip = mlast - 1;
823 /* process pattern[:-1] */
824 for (mask = i = 0; i < mlast; i++) {
825 mask |= (1 << (p[i] & 0x1F));
826 if (p[i] == p[mlast])
827 skip = mlast - i - 1;
828 }
829 /* process pattern[-1] outside the loop */
830 mask |= (1 << (p[mlast] & 0x1F));
831
832 for (i = 0; i <= w; i++) {
833 /* note: using mlast in the skip path slows things down on x86 */
834 if (s[i+m-1] == p[m-1]) {
835 /* candidate match */
836 for (j = 0; j < mlast; j++)
837 if (s[i+j] != p[j])
838 break;
839 if (j == mlast) {
840 /* got a match! */
841 if (mode != FAST_COUNT)
842 return i;
843 count++;
844 i = i + mlast;
845 continue;
846 }
847 /* miss: check if next character is part of pattern */
848 if (!(mask & (1 << (s[i+m] & 0x1F))))
849 i = i + m;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000850 else
Fredrik Lundhaf722372006-05-25 17:55:31 +0000851 i = i + skip;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000852 } else {
853 /* skip: check if next character is part of pattern */
854 if (!(mask & (1 << (s[i+m] & 0x1F))))
855 i = i + m;
856 }
857 }
858
859 if (mode != FAST_COUNT)
860 return -1;
861 return count;
862}
863
864/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865/* Methods */
866
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000867static int
Fred Drakeba096332000-07-09 07:04:36 +0000868string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000870 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000872 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000873
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000874 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000875 if (! PyString_CheckExact(op)) {
876 int ret;
877 /* A str subclass may have its own __str__ method. */
878 op = (PyStringObject *) PyObject_Str((PyObject *)op);
879 if (op == NULL)
880 return -1;
881 ret = string_print(op, fp, flags);
882 Py_DECREF(op);
883 return ret;
884 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000886#ifdef __VMS
887 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
888#else
889 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
890#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000891 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000893
Thomas Wouters7e474022000-07-16 12:04:32 +0000894 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000896 if (memchr(op->ob_sval, '\'', op->ob_size) &&
897 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000898 quote = '"';
899
900 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 for (i = 0; i < op->ob_size; i++) {
902 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000903 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000904 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000905 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000906 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000907 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000908 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000909 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000913 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000914 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000916 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000917 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918}
919
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000920PyObject *
921PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000923 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000924 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000925 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000926 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000927 PyErr_SetString(PyExc_OverflowError,
928 "string is too large to make repr");
929 }
930 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000932 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 }
934 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 register char c;
937 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000938 int quote;
939
Thomas Wouters7e474022000-07-16 12:04:32 +0000940 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000941 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000942 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000943 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000944 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000945 quote = '"';
946
Tim Peters9161c8b2001-12-03 01:55:38 +0000947 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000948 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000950 /* There's at least enough room for a hex escape
951 and a closing quote. */
952 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000954 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000956 else if (c == '\t')
957 *p++ = '\\', *p++ = 't';
958 else if (c == '\n')
959 *p++ = '\\', *p++ = 'n';
960 else if (c == '\r')
961 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000962 else if (c < ' ' || c >= 0x7f) {
963 /* For performance, we don't want to call
964 PyOS_snprintf here (extra layers of
965 function call). */
966 sprintf(p, "\\x%02x", c & 0xff);
967 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000968 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000969 else
970 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000972 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000973 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000976 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000977 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979}
980
Guido van Rossum189f1df2001-05-01 16:51:53 +0000981static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000982string_repr(PyObject *op)
983{
984 return PyString_Repr(op, 1);
985}
986
987static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000988string_str(PyObject *s)
989{
Tim Petersc9933152001-10-16 20:18:24 +0000990 assert(PyString_Check(s));
991 if (PyString_CheckExact(s)) {
992 Py_INCREF(s);
993 return s;
994 }
995 else {
996 /* Subtype -- return genuine string with the same value. */
997 PyStringObject *t = (PyStringObject *) s;
998 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
999 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001000}
1001
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001003string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004{
1005 return a->ob_size;
1006}
1007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001009string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010{
Andrew Dalke598710c2006-05-25 18:18:39 +00001011 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001012 register PyStringObject *op;
1013 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001014#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001015 if (PyUnicode_Check(bb))
1016 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001017#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001018 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001019 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001020 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 return NULL;
1022 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001024 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001025 if ((a->ob_size == 0 || b->ob_size == 0) &&
1026 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1027 if (a->ob_size == 0) {
1028 Py_INCREF(bb);
1029 return bb;
1030 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 Py_INCREF(a);
1032 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033 }
1034 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001035 if (size < 0) {
1036 PyErr_SetString(PyExc_OverflowError,
1037 "strings are too large to concat");
1038 return NULL;
1039 }
1040
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001041 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001042 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001043 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001045 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001046 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001047 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1049 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001050 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001051 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001052#undef b
1053}
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001056string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001057{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001058 register Py_ssize_t i;
1059 register Py_ssize_t j;
1060 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001061 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001062 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063 if (n < 0)
1064 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001065 /* watch out for overflows: the size can overflow int,
1066 * and the # of bytes needed can overflow size_t
1067 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001068 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001069 if (n && size / n != a->ob_size) {
1070 PyErr_SetString(PyExc_OverflowError,
1071 "repeated string is too long");
1072 return NULL;
1073 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001074 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 Py_INCREF(a);
1076 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077 }
Tim Peterse7c05322004-06-27 17:24:49 +00001078 nbytes = (size_t)size;
1079 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001080 PyErr_SetString(PyExc_OverflowError,
1081 "repeated string is too long");
1082 return NULL;
1083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001085 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001086 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001088 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001089 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001090 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001091 op->ob_sval[size] = '\0';
1092 if (a->ob_size == 1 && n > 0) {
1093 memset(op->ob_sval, a->ob_sval[0] , n);
1094 return (PyObject *) op;
1095 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001096 i = 0;
1097 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001098 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1099 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001100 }
1101 while (i < size) {
1102 j = (i <= size-i) ? i : size-i;
1103 memcpy(op->ob_sval+i, op->ob_sval, j);
1104 i += j;
1105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001106 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001107}
1108
1109/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1110
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001112string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001113 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001114 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115{
1116 if (i < 0)
1117 i = 0;
1118 if (j < 0)
1119 j = 0; /* Avoid signed/unsigned bug in next line */
1120 if (j > a->ob_size)
1121 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001122 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1123 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001124 Py_INCREF(a);
1125 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001126 }
1127 if (j < i)
1128 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001129 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001130}
1131
Guido van Rossum9284a572000-03-07 15:53:43 +00001132static int
Fred Drakeba096332000-07-09 07:04:36 +00001133string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001134{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001135 char *s = PyString_AS_STRING(a);
1136 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001137 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001138#ifdef USE_FAST
1139 Py_ssize_t pos;
1140#else
1141 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001142 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001143 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001144#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001145
1146 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001147#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001148 if (PyUnicode_Check(el))
1149 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001150#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001151 if (!PyString_Check(el)) {
1152 PyErr_SetString(PyExc_TypeError,
1153 "'in <string>' requires string as left operand");
1154 return -1;
1155 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001156 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001157
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001158 if (len_sub == 0)
1159 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001160
1161#ifdef USE_FAST
1162 pos = fastsearch(
1163 s, PyString_GET_SIZE(a),
1164 sub, len_sub, FAST_SEARCH
1165 );
1166 return (pos != -1);
1167#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001168 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001169 substring. When s<last, there is still room for a possible match
1170 and s[0] through s[len_sub-1] will be in bounds.
1171 shortsub is len_sub minus the last character which is checked
1172 separately just before the memcmp(). That check helps prevent
1173 false starts and saves the setup time for memcmp().
1174 */
1175 firstchar = sub[0];
1176 shortsub = len_sub - 1;
1177 lastchar = sub[shortsub];
1178 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1179 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001180 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001181 if (s == NULL)
1182 return 0;
1183 assert(s < last);
1184 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001185 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001186 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001187 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001188#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001189 return 0;
1190}
1191
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001192static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001193string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001194{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001195 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001196 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001197 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001198 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001199 return NULL;
1200 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001201 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001202 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001203 if (v == NULL)
1204 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001205 else {
1206#ifdef COUNT_ALLOCS
1207 one_strings++;
1208#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001209 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001210 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001211 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001212}
1213
Martin v. Löwiscd353062001-05-24 16:56:35 +00001214static PyObject*
1215string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001216{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001217 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001218 Py_ssize_t len_a, len_b;
1219 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001220 PyObject *result;
1221
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001222 /* Make sure both arguments are strings. */
1223 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001224 result = Py_NotImplemented;
1225 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001226 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001227 if (a == b) {
1228 switch (op) {
1229 case Py_EQ:case Py_LE:case Py_GE:
1230 result = Py_True;
1231 goto out;
1232 case Py_NE:case Py_LT:case Py_GT:
1233 result = Py_False;
1234 goto out;
1235 }
1236 }
1237 if (op == Py_EQ) {
1238 /* Supporting Py_NE here as well does not save
1239 much time, since Py_NE is rarely used. */
1240 if (a->ob_size == b->ob_size
1241 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001242 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001243 a->ob_size) == 0)) {
1244 result = Py_True;
1245 } else {
1246 result = Py_False;
1247 }
1248 goto out;
1249 }
1250 len_a = a->ob_size; len_b = b->ob_size;
1251 min_len = (len_a < len_b) ? len_a : len_b;
1252 if (min_len > 0) {
1253 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1254 if (c==0)
1255 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1256 }else
1257 c = 0;
1258 if (c == 0)
1259 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1260 switch (op) {
1261 case Py_LT: c = c < 0; break;
1262 case Py_LE: c = c <= 0; break;
1263 case Py_EQ: assert(0); break; /* unreachable */
1264 case Py_NE: c = c != 0; break;
1265 case Py_GT: c = c > 0; break;
1266 case Py_GE: c = c >= 0; break;
1267 default:
1268 result = Py_NotImplemented;
1269 goto out;
1270 }
1271 result = c ? Py_True : Py_False;
1272 out:
1273 Py_INCREF(result);
1274 return result;
1275}
1276
1277int
1278_PyString_Eq(PyObject *o1, PyObject *o2)
1279{
1280 PyStringObject *a, *b;
1281 a = (PyStringObject*)o1;
1282 b = (PyStringObject*)o2;
1283 return a->ob_size == b->ob_size
1284 && *a->ob_sval == *b->ob_sval
1285 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001286}
1287
Guido van Rossum9bfef441993-03-29 10:43:31 +00001288static long
Fred Drakeba096332000-07-09 07:04:36 +00001289string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001290{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001292 register unsigned char *p;
1293 register long x;
1294
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001295 if (a->ob_shash != -1)
1296 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001297 len = a->ob_size;
1298 p = (unsigned char *) a->ob_sval;
1299 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001300 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001301 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001302 x ^= a->ob_size;
1303 if (x == -1)
1304 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001305 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001306 return x;
1307}
1308
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001309#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1310
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001311static PyObject*
1312string_subscript(PyStringObject* self, PyObject* item)
1313{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001314 PyNumberMethods *nb = item->ob_type->tp_as_number;
1315 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1316 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001317 if (i == -1 && PyErr_Occurred())
1318 return NULL;
1319 if (i < 0)
1320 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001321 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322 }
1323 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001325 char* source_buf;
1326 char* result_buf;
1327 PyObject* result;
1328
Tim Petersae1d0c92006-03-17 03:29:34 +00001329 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001330 PyString_GET_SIZE(self),
1331 &start, &stop, &step, &slicelength) < 0) {
1332 return NULL;
1333 }
1334
1335 if (slicelength <= 0) {
1336 return PyString_FromStringAndSize("", 0);
1337 }
1338 else {
1339 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001340 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001341 if (result_buf == NULL)
1342 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001343
Tim Petersae1d0c92006-03-17 03:29:34 +00001344 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001345 cur += step, i++) {
1346 result_buf[i] = source_buf[cur];
1347 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001348
1349 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001350 slicelength);
1351 PyMem_Free(result_buf);
1352 return result;
1353 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001354 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001355 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001356 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001357 "string indices must be integers");
1358 return NULL;
1359 }
1360}
1361
Martin v. Löwis18e16552006-02-15 17:27:45 +00001362static Py_ssize_t
1363string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001364{
1365 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001366 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001367 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001368 return -1;
1369 }
1370 *ptr = (void *)self->ob_sval;
1371 return self->ob_size;
1372}
1373
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374static Py_ssize_t
1375string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001376{
Guido van Rossum045e6881997-09-08 18:30:11 +00001377 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001378 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001379 return -1;
1380}
1381
Martin v. Löwis18e16552006-02-15 17:27:45 +00001382static Py_ssize_t
1383string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001384{
1385 if ( lenp )
1386 *lenp = self->ob_size;
1387 return 1;
1388}
1389
Martin v. Löwis18e16552006-02-15 17:27:45 +00001390static Py_ssize_t
1391string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001392{
1393 if ( index != 0 ) {
1394 PyErr_SetString(PyExc_SystemError,
1395 "accessing non-existent string segment");
1396 return -1;
1397 }
1398 *ptr = self->ob_sval;
1399 return self->ob_size;
1400}
1401
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001402static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001403 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001404 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001405 (ssizeargfunc)string_repeat, /*sq_repeat*/
1406 (ssizeargfunc)string_item, /*sq_item*/
1407 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001408 0, /*sq_ass_item*/
1409 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001410 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001411};
1412
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001413static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001414 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001415 (binaryfunc)string_subscript,
1416 0,
1417};
1418
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001419static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001420 (readbufferproc)string_buffer_getreadbuf,
1421 (writebufferproc)string_buffer_getwritebuf,
1422 (segcountproc)string_buffer_getsegcount,
1423 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001424};
1425
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426
1427
1428#define LEFTSTRIP 0
1429#define RIGHTSTRIP 1
1430#define BOTHSTRIP 2
1431
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001432/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001433static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1434
1435#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001436
Andrew Dalke525eab32006-05-26 14:00:45 +00001437
1438/* Overallocate the initial list to reduce the number of reallocs for small
1439 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1440 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1441 text (roughly 11 words per line) and field delimited data (usually 1-10
1442 fields). For large strings the split algorithms are bandwidth limited
1443 so increasing the preallocation likely will not improve things.*/
1444
1445#define MAX_PREALLOC 12
1446
1447/* 5 splits gives 6 elements */
1448#define PREALLOC_SIZE(maxsplit) \
1449 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1450
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451#define SPLIT_APPEND(data, left, right) \
1452 str = PyString_FromStringAndSize((data) + (left), \
1453 (right) - (left)); \
1454 if (str == NULL) \
1455 goto onError; \
1456 if (PyList_Append(list, str)) { \
1457 Py_DECREF(str); \
1458 goto onError; \
1459 } \
1460 else \
1461 Py_DECREF(str);
1462
Andrew Dalke02758d62006-05-26 15:21:01 +00001463#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001464 str = PyString_FromStringAndSize((data) + (left), \
1465 (right) - (left)); \
1466 if (str == NULL) \
1467 goto onError; \
1468 if (count < MAX_PREALLOC) { \
1469 PyList_SET_ITEM(list, count, str); \
1470 } else { \
1471 if (PyList_Append(list, str)) { \
1472 Py_DECREF(str); \
1473 goto onError; \
1474 } \
1475 else \
1476 Py_DECREF(str); \
1477 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001478 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001479
1480/* Always force the list to the expected size. */
1481#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1482
Andrew Dalke02758d62006-05-26 15:21:01 +00001483#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1484#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1485#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1486#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1487
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001489split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490{
Andrew Dalke525eab32006-05-26 14:00:45 +00001491 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001493 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494
1495 if (list == NULL)
1496 return NULL;
1497
Andrew Dalke02758d62006-05-26 15:21:01 +00001498 i = j = 0;
1499
1500 while (maxsplit-- > 0) {
1501 SKIP_SPACE(s, i, len);
1502 if (i==len) break;
1503 j = i; i++;
1504 SKIP_NONSPACE(s, i, len);
1505 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001507
1508 if (i < len) {
1509 /* Only occurs when maxsplit was reached */
1510 /* Skip any remaining whitespace and copy to end of string */
1511 SKIP_SPACE(s, i, len);
1512 if (i != len)
1513 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001514 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001515 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001517 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 Py_DECREF(list);
1519 return NULL;
1520}
1521
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001522static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001523split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001524{
Andrew Dalke525eab32006-05-26 14:00:45 +00001525 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001526 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001527 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001528
1529 if (list == NULL)
1530 return NULL;
1531
1532 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001533 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001534 if (s[i] == ch) {
1535 if (maxcount-- <= 0)
1536 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001537 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001538 i = j = i + 1;
1539 } else
1540 i++;
1541 }
1542 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001543 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001544 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001545 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001546 return list;
1547
1548 onError:
1549 Py_DECREF(list);
1550 return NULL;
1551}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001553PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554"S.split([sep [,maxsplit]]) -> list of strings\n\
1555\n\
1556Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001558splits are done. If sep is not specified or is None, any\n\
1559whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560
1561static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001562string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001563{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001564 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001565 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001567 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Martin v. Löwis9c830762006-04-13 08:37:17 +00001569 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001572 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 if (PyString_Check(subobj)) {
1576 sub = PyString_AS_STRING(subobj);
1577 n = PyString_GET_SIZE(subobj);
1578 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001579#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 else if (PyUnicode_Check(subobj))
1581 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001582#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001583 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1584 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001585
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586 if (n == 0) {
1587 PyErr_SetString(PyExc_ValueError, "empty separator");
1588 return NULL;
1589 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001590 else if (n == 1)
1591 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592
Andrew Dalke525eab32006-05-26 14:00:45 +00001593 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594 if (list == NULL)
1595 return NULL;
1596
1597 i = j = 0;
1598 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001599 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001600 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001601 if (maxsplit-- <= 0)
1602 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001603 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605 }
1606 else
1607 i++;
1608 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001609 SPLIT_ADD(s, j, len);
1610 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611 return list;
1612
Andrew Dalke525eab32006-05-26 14:00:45 +00001613 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 Py_DECREF(list);
1615 return NULL;
1616}
1617
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001618PyDoc_STRVAR(partition__doc__,
1619"S.partition(sep) -> (head, sep, tail)\n\
1620\n\
1621Searches for the separator sep in S, and returns the part before it,\n\
1622the separator itself, and the part after it. If the separator is not\n\
1623found, returns S and two empty strings.");
1624
1625static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001626string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001627{
1628 Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
1629 const char *str = PyString_AS_STRING(self), *sep;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001630 PyObject * out;
1631
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001632 if (PyString_Check(sep_obj)) {
1633 sep = PyString_AS_STRING(sep_obj);
1634 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001635 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001636#ifdef Py_USING_UNICODE
1637 else if (PyUnicode_Check(sep_obj))
1638 return PyUnicode_Partition((PyObject *)self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001639#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001640 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001641 return NULL;
1642
1643 if (sep_len == 0) {
1644 PyErr_SetString(PyExc_ValueError, "empty separator");
1645 return NULL;
1646 }
1647
1648 out = PyTuple_New(3);
1649 if (!out)
1650 return NULL;
1651
1652 pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
1653 if (pos < 0) {
1654 Py_INCREF(self);
1655 PyTuple_SET_ITEM(out, 0, (PyObject*) self);
1656 Py_INCREF(nullstring);
1657 PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring);
1658 Py_INCREF(nullstring);
1659 PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
1660 } else {
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001661 PyObject* obj;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001662 PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001663 Py_INCREF(sep_obj);
1664 PyTuple_SET_ITEM(out, 1, sep_obj);
1665 pos += sep_len;
1666 obj = PyString_FromStringAndSize(str + pos, len - pos);
1667 PyTuple_SET_ITEM(out, 2, obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001668 if (PyErr_Occurred()) {
1669 Py_DECREF(out);
1670 return NULL;
1671 }
1672 }
1673
1674 return out;
1675}
1676
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001677static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001678rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679{
Andrew Dalke525eab32006-05-26 14:00:45 +00001680 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001681 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001682 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683
1684 if (list == NULL)
1685 return NULL;
1686
Andrew Dalke02758d62006-05-26 15:21:01 +00001687 i = j = len-1;
1688
1689 while (maxsplit-- > 0) {
1690 RSKIP_SPACE(s, i);
1691 if (i<0) break;
1692 j = i; i--;
1693 RSKIP_NONSPACE(s, i);
1694 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001695 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001696 if (i >= 0) {
1697 /* Only occurs when maxsplit was reached */
1698 /* Skip any remaining whitespace and copy to beginning of string */
1699 RSKIP_SPACE(s, i);
1700 if (i >= 0)
1701 SPLIT_ADD(s, 0, i + 1);
1702
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001704 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001705 if (PyList_Reverse(list) < 0)
1706 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001708 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001709 Py_DECREF(list);
1710 return NULL;
1711}
1712
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001713static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001714rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001715{
Andrew Dalke525eab32006-05-26 14:00:45 +00001716 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001717 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001718 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001719
1720 if (list == NULL)
1721 return NULL;
1722
1723 for (i = j = len - 1; i >= 0; ) {
1724 if (s[i] == ch) {
1725 if (maxcount-- <= 0)
1726 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001727 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001728 j = i = i - 1;
1729 } else
1730 i--;
1731 }
1732 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001733 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001734 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001735 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001736 if (PyList_Reverse(list) < 0)
1737 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001738 return list;
1739
1740 onError:
1741 Py_DECREF(list);
1742 return NULL;
1743}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001744
1745PyDoc_STRVAR(rsplit__doc__,
1746"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1747\n\
1748Return a list of the words in the string S, using sep as the\n\
1749delimiter string, starting at the end of the string and working\n\
1750to the front. If maxsplit is given, at most maxsplit splits are\n\
1751done. If sep is not specified or is None, any whitespace string\n\
1752is a separator.");
1753
1754static PyObject *
1755string_rsplit(PyStringObject *self, PyObject *args)
1756{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001757 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001758 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001759 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001760 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001761
Martin v. Löwis9c830762006-04-13 08:37:17 +00001762 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001763 return NULL;
1764 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001765 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001766 if (subobj == Py_None)
1767 return rsplit_whitespace(s, len, maxsplit);
1768 if (PyString_Check(subobj)) {
1769 sub = PyString_AS_STRING(subobj);
1770 n = PyString_GET_SIZE(subobj);
1771 }
1772#ifdef Py_USING_UNICODE
1773 else if (PyUnicode_Check(subobj))
1774 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1775#endif
1776 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1777 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001778
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001779 if (n == 0) {
1780 PyErr_SetString(PyExc_ValueError, "empty separator");
1781 return NULL;
1782 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001783 else if (n == 1)
1784 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001785
Andrew Dalke525eab32006-05-26 14:00:45 +00001786 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001787 if (list == NULL)
1788 return NULL;
1789
1790 j = len;
1791 i = j - n;
1792 while (i >= 0) {
1793 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1794 if (maxsplit-- <= 0)
1795 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001796 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001797 j = i;
1798 i -= n;
1799 }
1800 else
1801 i--;
1802 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001803 SPLIT_ADD(s, 0, j);
1804 FIX_PREALLOC_SIZE(list);
1805 if (PyList_Reverse(list) < 0)
1806 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001807 return list;
1808
Andrew Dalke525eab32006-05-26 14:00:45 +00001809onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001810 Py_DECREF(list);
1811 return NULL;
1812}
1813
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816"S.join(sequence) -> string\n\
1817\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001818Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001819sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820
1821static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001822string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823{
1824 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001825 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001829 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001830 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001831 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832
Tim Peters19fe14e2001-01-19 03:03:47 +00001833 seq = PySequence_Fast(orig, "");
1834 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001835 return NULL;
1836 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001837
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001838 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001839 if (seqlen == 0) {
1840 Py_DECREF(seq);
1841 return PyString_FromString("");
1842 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001844 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001845 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1846 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001847 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001848 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001849 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001851
Raymond Hettinger674f2412004-08-23 23:23:54 +00001852 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001853 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001854 * Do a pre-pass to figure out the total amount of space we'll
1855 * need (sz), see whether any argument is absurd, and defer to
1856 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001857 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001858 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001859 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001860 item = PySequence_Fast_GET_ITEM(seq, i);
1861 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001862#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001863 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001864 /* Defer to Unicode join.
1865 * CAUTION: There's no gurantee that the
1866 * original sequence can be iterated over
1867 * again, so we must pass seq here.
1868 */
1869 PyObject *result;
1870 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001871 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001872 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001873 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001874#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001875 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001876 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001877 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001878 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001879 Py_DECREF(seq);
1880 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001881 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001882 sz += PyString_GET_SIZE(item);
1883 if (i != 0)
1884 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001885 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001886 PyErr_SetString(PyExc_OverflowError,
1887 "join() is too long for a Python string");
1888 Py_DECREF(seq);
1889 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001891 }
1892
1893 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001894 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001895 if (res == NULL) {
1896 Py_DECREF(seq);
1897 return NULL;
1898 }
1899
1900 /* Catenate everything. */
1901 p = PyString_AS_STRING(res);
1902 for (i = 0; i < seqlen; ++i) {
1903 size_t n;
1904 item = PySequence_Fast_GET_ITEM(seq, i);
1905 n = PyString_GET_SIZE(item);
1906 memcpy(p, PyString_AS_STRING(item), n);
1907 p += n;
1908 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001909 memcpy(p, sep, seplen);
1910 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001911 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001913
Jeremy Hylton49048292000-07-11 03:28:17 +00001914 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916}
1917
Tim Peters52e155e2001-06-16 05:42:57 +00001918PyObject *
1919_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001920{
Tim Petersa7259592001-06-16 05:11:17 +00001921 assert(sep != NULL && PyString_Check(sep));
1922 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001923 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001924}
1925
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001926static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001928{
1929 if (*end > len)
1930 *end = len;
1931 else if (*end < 0)
1932 *end += len;
1933 if (*end < 0)
1934 *end = 0;
1935 if (*start < 0)
1936 *start += len;
1937 if (*start < 0)
1938 *start = 0;
1939}
1940
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001942string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001944 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001945 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001946 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001947 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948
Martin v. Löwis18e16552006-02-15 17:27:45 +00001949 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001950 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001951 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952 return -2;
1953 if (PyString_Check(subobj)) {
1954 sub = PyString_AS_STRING(subobj);
1955 n = PyString_GET_SIZE(subobj);
1956 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001957#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001958 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001959 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001960#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001961 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 return -2;
1963
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001964 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001966#ifdef USE_FAST
1967 if (n == 0)
1968 return (dir > 0) ? i : last;
1969 if (dir > 0) {
1970 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1971 FAST_SEARCH);
1972 if (pos < 0)
1973 return pos;
1974 return pos + i;
1975 }
1976#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 if (dir > 0) {
1978 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001980 last -= n;
1981 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001982 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983 return (long)i;
1984 }
1985 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001986 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001987
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001990 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001991 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001992 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001993 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001994
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995 return -1;
1996}
1997
1998
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001999PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000"S.find(sub [,start [,end]]) -> int\n\
2001\n\
2002Return the lowest index in S where substring sub is found,\n\
2003such that sub is contained within s[start,end]. Optional\n\
2004arguments start and end are interpreted as in slice notation.\n\
2005\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002006Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007
2008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002009string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 if (result == -2)
2013 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015}
2016
2017
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002018PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019"S.index(sub [,start [,end]]) -> int\n\
2020\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022
2023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002024string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002026 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027 if (result == -2)
2028 return NULL;
2029 if (result == -1) {
2030 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002031 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032 return NULL;
2033 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002034 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035}
2036
2037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039"S.rfind(sub [,start [,end]]) -> int\n\
2040\n\
2041Return the highest index in S where substring sub is found,\n\
2042such that sub is contained within s[start,end]. Optional\n\
2043arguments start and end are interpreted as in slice notation.\n\
2044\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002045Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046
2047static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002048string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002050 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 if (result == -2)
2052 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002053 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054}
2055
2056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058"S.rindex(sub [,start [,end]]) -> int\n\
2059\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
2062static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002063string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002065 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066 if (result == -2)
2067 return NULL;
2068 if (result == -1) {
2069 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002070 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071 return NULL;
2072 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002073 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074}
2075
2076
2077static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002078do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2079{
2080 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002081 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002082 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002083 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2084 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085
2086 i = 0;
2087 if (striptype != RIGHTSTRIP) {
2088 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2089 i++;
2090 }
2091 }
2092
2093 j = len;
2094 if (striptype != LEFTSTRIP) {
2095 do {
2096 j--;
2097 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2098 j++;
2099 }
2100
2101 if (i == 0 && j == len && PyString_CheckExact(self)) {
2102 Py_INCREF(self);
2103 return (PyObject*)self;
2104 }
2105 else
2106 return PyString_FromStringAndSize(s+i, j-i);
2107}
2108
2109
2110static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002111do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112{
2113 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002114 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 i = 0;
2117 if (striptype != RIGHTSTRIP) {
2118 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2119 i++;
2120 }
2121 }
2122
2123 j = len;
2124 if (striptype != LEFTSTRIP) {
2125 do {
2126 j--;
2127 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2128 j++;
2129 }
2130
Tim Peters8fa5dd02001-09-12 02:18:30 +00002131 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 Py_INCREF(self);
2133 return (PyObject*)self;
2134 }
2135 else
2136 return PyString_FromStringAndSize(s+i, j-i);
2137}
2138
2139
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002140static PyObject *
2141do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2142{
2143 PyObject *sep = NULL;
2144
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002145 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002146 return NULL;
2147
2148 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002149 if (PyString_Check(sep))
2150 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002151#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002152 else if (PyUnicode_Check(sep)) {
2153 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2154 PyObject *res;
2155 if (uniself==NULL)
2156 return NULL;
2157 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2158 striptype, sep);
2159 Py_DECREF(uniself);
2160 return res;
2161 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002162#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002163 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002164#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002165 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002166#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002167 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002168#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002169 STRIPNAME(striptype));
2170 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002171 }
2172
2173 return do_strip(self, striptype);
2174}
2175
2176
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002177PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002178"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179\n\
2180Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002181whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002182If chars is given and not None, remove characters in chars instead.\n\
2183If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184
2185static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002186string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002188 if (PyTuple_GET_SIZE(args) == 0)
2189 return do_strip(self, BOTHSTRIP); /* Common case */
2190 else
2191 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192}
2193
2194
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002195PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002196"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002198Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002199If chars is given and not None, remove characters in chars instead.\n\
2200If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201
2202static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002203string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002205 if (PyTuple_GET_SIZE(args) == 0)
2206 return do_strip(self, LEFTSTRIP); /* Common case */
2207 else
2208 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209}
2210
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002213"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002215Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002216If chars is given and not None, remove characters in chars instead.\n\
2217If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
2219static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002220string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002222 if (PyTuple_GET_SIZE(args) == 0)
2223 return do_strip(self, RIGHTSTRIP); /* Common case */
2224 else
2225 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226}
2227
2228
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002229PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230"S.lower() -> string\n\
2231\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002232Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002234/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2235#ifndef _tolower
2236#define _tolower tolower
2237#endif
2238
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002240string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002242 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002243 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002246 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002247 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002249
2250 s = PyString_AS_STRING(newobj);
2251
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002252 memcpy(s, PyString_AS_STRING(self), n);
2253
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002255 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002256 if (isupper(c))
2257 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002259
Anthony Baxtera6286212006-04-11 07:42:36 +00002260 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261}
2262
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002263PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264"S.upper() -> string\n\
2265\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002266Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002268#ifndef _toupper
2269#define _toupper toupper
2270#endif
2271
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002273string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002275 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002276 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002277 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002279 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002280 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002282
2283 s = PyString_AS_STRING(newobj);
2284
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002285 memcpy(s, PyString_AS_STRING(self), n);
2286
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002288 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002289 if (islower(c))
2290 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002292
Anthony Baxtera6286212006-04-11 07:42:36 +00002293 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294}
2295
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002296PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297"S.title() -> string\n\
2298\n\
2299Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002300characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301
2302static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002303string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304{
2305 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002306 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002308 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309
Anthony Baxtera6286212006-04-11 07:42:36 +00002310 newobj = PyString_FromStringAndSize(NULL, n);
2311 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002313 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 for (i = 0; i < n; i++) {
2315 int c = Py_CHARMASK(*s++);
2316 if (islower(c)) {
2317 if (!previous_is_cased)
2318 c = toupper(c);
2319 previous_is_cased = 1;
2320 } else if (isupper(c)) {
2321 if (previous_is_cased)
2322 c = tolower(c);
2323 previous_is_cased = 1;
2324 } else
2325 previous_is_cased = 0;
2326 *s_new++ = c;
2327 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002328 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329}
2330
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002331PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332"S.capitalize() -> string\n\
2333\n\
2334Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002335capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336
2337static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002338string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339{
2340 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002341 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002342 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343
Anthony Baxtera6286212006-04-11 07:42:36 +00002344 newobj = PyString_FromStringAndSize(NULL, n);
2345 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002347 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348 if (0 < n) {
2349 int c = Py_CHARMASK(*s++);
2350 if (islower(c))
2351 *s_new = toupper(c);
2352 else
2353 *s_new = c;
2354 s_new++;
2355 }
2356 for (i = 1; i < n; i++) {
2357 int c = Py_CHARMASK(*s++);
2358 if (isupper(c))
2359 *s_new = tolower(c);
2360 else
2361 *s_new = c;
2362 s_new++;
2363 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002364 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365}
2366
2367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002368PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369"S.count(sub[, start[, end]]) -> int\n\
2370\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002371Return the number of non-overlapping occurrences of substring sub in\n\
2372string S[start:end]. Optional arguments start and end are interpreted\n\
2373as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374
2375static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002376string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002378 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002379 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002380 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002381 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383
Guido van Rossumc6821402000-05-08 14:08:05 +00002384 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2385 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002387
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 if (PyString_Check(subobj)) {
2389 sub = PyString_AS_STRING(subobj);
2390 n = PyString_GET_SIZE(subobj);
2391 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002392#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002393 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002394 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002395 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2396 if (count == -1)
2397 return NULL;
2398 else
2399 return PyInt_FromLong((long) count);
2400 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002401#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2403 return NULL;
2404
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002405 string_adjust_indices(&i, &last, len);
2406
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 m = last + 1 - n;
2408 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002409 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410
Fredrik Lundhaf722372006-05-25 17:55:31 +00002411#ifdef USE_FAST
2412 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2413 if (r < 0)
2414 r = 0; /* no match */
2415#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416 r = 0;
2417 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002418 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419 if (!memcmp(s+i, sub, n)) {
2420 r++;
2421 i += n;
2422 } else {
2423 i++;
2424 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002425 if (i >= m)
2426 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002427 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002428 if (t == NULL)
2429 break;
2430 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002432#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002433 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434}
2435
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002436PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437"S.swapcase() -> string\n\
2438\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002440converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441
2442static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002443string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444{
2445 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002446 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002447 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448
Anthony Baxtera6286212006-04-11 07:42:36 +00002449 newobj = PyString_FromStringAndSize(NULL, n);
2450 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002452 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453 for (i = 0; i < n; i++) {
2454 int c = Py_CHARMASK(*s++);
2455 if (islower(c)) {
2456 *s_new = toupper(c);
2457 }
2458 else if (isupper(c)) {
2459 *s_new = tolower(c);
2460 }
2461 else
2462 *s_new = c;
2463 s_new++;
2464 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002465 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466}
2467
2468
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002469PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470"S.translate(table [,deletechars]) -> string\n\
2471\n\
2472Return a copy of the string S, where all characters occurring\n\
2473in the optional argument deletechars are removed, and the\n\
2474remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002475translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
2477static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002478string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 register char *input, *output;
2481 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002482 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002485 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486 PyObject *result;
2487 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002488 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002490 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002491 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002493
2494 if (PyString_Check(tableobj)) {
2495 table1 = PyString_AS_STRING(tableobj);
2496 tablen = PyString_GET_SIZE(tableobj);
2497 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002498#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002500 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 parameter; instead a mapping to None will cause characters
2502 to be deleted. */
2503 if (delobj != NULL) {
2504 PyErr_SetString(PyExc_TypeError,
2505 "deletions are implemented differently for unicode");
2506 return NULL;
2507 }
2508 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2509 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002510#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513
Martin v. Löwis00b61272002-12-12 20:03:19 +00002514 if (tablen != 256) {
2515 PyErr_SetString(PyExc_ValueError,
2516 "translation table must be 256 characters long");
2517 return NULL;
2518 }
2519
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520 if (delobj != NULL) {
2521 if (PyString_Check(delobj)) {
2522 del_table = PyString_AS_STRING(delobj);
2523 dellen = PyString_GET_SIZE(delobj);
2524 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002525#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 else if (PyUnicode_Check(delobj)) {
2527 PyErr_SetString(PyExc_TypeError,
2528 "deletions are implemented differently for unicode");
2529 return NULL;
2530 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002531#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2533 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 }
2535 else {
2536 del_table = NULL;
2537 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002538 }
2539
2540 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002541 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002542 result = PyString_FromStringAndSize((char *)NULL, inlen);
2543 if (result == NULL)
2544 return NULL;
2545 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002546 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547
2548 if (dellen == 0) {
2549 /* If no deletions are required, use faster code */
2550 for (i = inlen; --i >= 0; ) {
2551 c = Py_CHARMASK(*input++);
2552 if (Py_CHARMASK((*output++ = table[c])) != c)
2553 changed = 1;
2554 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002555 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556 return result;
2557 Py_DECREF(result);
2558 Py_INCREF(input_obj);
2559 return input_obj;
2560 }
2561
2562 for (i = 0; i < 256; i++)
2563 trans_table[i] = Py_CHARMASK(table[i]);
2564
2565 for (i = 0; i < dellen; i++)
2566 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2567
2568 for (i = inlen; --i >= 0; ) {
2569 c = Py_CHARMASK(*input++);
2570 if (trans_table[c] != -1)
2571 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2572 continue;
2573 changed = 1;
2574 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002575 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576 Py_DECREF(result);
2577 Py_INCREF(input_obj);
2578 return input_obj;
2579 }
2580 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002581 if (inlen > 0)
2582 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002583 return result;
2584}
2585
2586
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587#define FORWARD 1
2588#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002589
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002590/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002592/* Don't call if length < 2 */
2593#define Py_STRING_MATCH(target, offset, pattern, length) \
2594 (target[offset] == pattern[0] && \
2595 target[offset+length-1] == pattern[length-1] && \
2596 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002597
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002598#define findchar(target, target_len, c) \
2599 ((char *)memchr((const void *)(target), c, target_len))
2600
2601/* String ops must return a string. */
2602/* If the object is subclass of string, create a copy */
2603static PyStringObject *
2604return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002606 if (PyString_CheckExact(self)) {
2607 Py_INCREF(self);
2608 return self;
2609 }
2610 return (PyStringObject *)PyString_FromStringAndSize(
2611 PyString_AS_STRING(self),
2612 PyString_GET_SIZE(self));
2613}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615static Py_ssize_t
2616countchar(char *target, int target_len, char c)
2617{
2618 Py_ssize_t count=0;
2619 char *start=target;
2620 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002622 while ( (start=findchar(start, end-start, c)) != NULL ) {
2623 count++;
2624 start += 1;
2625 }
2626
2627 return count;
2628}
2629
2630static Py_ssize_t
2631findstring(char *target, Py_ssize_t target_len,
2632 char *pattern, Py_ssize_t pattern_len,
2633 Py_ssize_t start,
2634 Py_ssize_t end,
2635 int direction)
2636{
2637 if (start < 0) {
2638 start += target_len;
2639 if (start < 0)
2640 start = 0;
2641 }
2642 if (end > target_len) {
2643 end = target_len;
2644 } else if (end < 0) {
2645 end += target_len;
2646 if (end < 0)
2647 end = 0;
2648 }
2649
2650 /* zero-length substrings always match at the first attempt */
2651 if (pattern_len == 0)
2652 return (direction > 0) ? start : end;
2653
2654 end -= pattern_len;
2655
2656 if (direction < 0) {
2657 for (; end >= start; end--)
2658 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2659 return end;
2660 } else {
2661 for (; start <= end; start++)
2662 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2663 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002664 }
2665 return -1;
2666}
2667
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002668Py_ssize_t
2669countstring(char *target, Py_ssize_t target_len,
2670 char *pattern, Py_ssize_t pattern_len,
2671 Py_ssize_t start,
2672 Py_ssize_t end,
2673 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002674{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002675 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002676
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002677 if (start < 0) {
2678 start += target_len;
2679 if (start < 0)
2680 start = 0;
2681 }
2682 if (end > target_len) {
2683 end = target_len;
2684 } else if (end < 0) {
2685 end += target_len;
2686 if (end < 0)
2687 end = 0;
2688 }
2689
2690 /* zero-length substrings match everywhere */
2691 if (pattern_len == 0)
2692 return target_len+1;
2693
2694 end -= pattern_len;
2695
2696 if (direction < 0) {
2697 for (; end >= start; end--)
2698 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2699 count++;
2700 end -= pattern_len-1;
2701 }
2702 } else {
2703 for (; start <= end; start++)
2704 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2705 count++;
2706 start += pattern_len-1;
2707 }
2708 }
2709 return count;
2710}
2711
2712
2713/* Algorithms for difference cases of string replacement */
2714
2715/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2716static PyStringObject *
2717replace_interleave(PyStringObject *self,
2718 PyStringObject *to,
2719 Py_ssize_t maxcount)
2720{
2721 char *self_s, *to_s, *result_s;
2722 Py_ssize_t self_len, to_len, result_len;
2723 Py_ssize_t count, i, product;
2724 PyStringObject *result;
2725
2726 self_len = PyString_GET_SIZE(self);
2727 to_len = PyString_GET_SIZE(to);
2728
2729 /* 1 at the end plus 1 after every character */
2730 count = self_len+1;
2731 if (maxcount < count)
2732 count = maxcount;
2733
2734 /* Check for overflow */
2735 /* result_len = count * to_len + self_len; */
2736 product = count * to_len;
2737 if (product / to_len != count) {
2738 PyErr_SetString(PyExc_OverflowError,
2739 "replace string is too long");
2740 return NULL;
2741 }
2742 result_len = product + self_len;
2743 if (result_len < 0) {
2744 PyErr_SetString(PyExc_OverflowError,
2745 "replace string is too long");
2746 return NULL;
2747 }
2748
2749 if (! (result = (PyStringObject *)
2750 PyString_FromStringAndSize(NULL, result_len)) )
2751 return NULL;
2752
2753 self_s = PyString_AS_STRING(self);
2754 to_s = PyString_AS_STRING(to);
2755 to_len = PyString_GET_SIZE(to);
2756 result_s = PyString_AS_STRING(result);
2757
2758 /* TODO: special case single character, which doesn't need memcpy */
2759
2760 /* Lay the first one down (guaranteed this will occur) */
2761 memcpy(result_s, to_s, to_len);
2762 result_s += to_len;
2763 count -= 1;
2764
2765 for (i=0; i<count; i++) {
2766 *result_s++ = *self_s++;
2767 memcpy(result_s, to_s, to_len);
2768 result_s += to_len;
2769 }
2770
2771 /* Copy the rest of the original string */
2772 memcpy(result_s, self_s, self_len-i);
2773
2774 return result;
2775}
2776
2777/* Special case for deleting a single character */
2778/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2779static PyStringObject *
2780replace_delete_single_character(PyStringObject *self,
2781 char from_c, Py_ssize_t maxcount)
2782{
2783 char *self_s, *result_s;
2784 char *start, *next, *end;
2785 Py_ssize_t self_len, result_len;
2786 Py_ssize_t count;
2787 PyStringObject *result;
2788
2789 self_len = PyString_GET_SIZE(self);
2790 self_s = PyString_AS_STRING(self);
2791
2792 count = countchar(self_s, self_len, from_c);
2793 if (count == 0) {
2794 return return_self(self);
2795 }
2796 if (count > maxcount)
2797 count = maxcount;
2798
2799 result_len = self_len - count; /* from_len == 1 */
2800 assert(result_len>=0);
2801
2802 if ( (result = (PyStringObject *)
2803 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2804 return NULL;
2805 result_s = PyString_AS_STRING(result);
2806
2807 start = self_s;
2808 end = self_s + self_len;
2809 while (count-- > 0) {
2810 next = findchar(start, end-start, from_c);
2811 if (next == NULL)
2812 break;
2813 memcpy(result_s, start, next-start);
2814 result_s += (next-start);
2815 start = next+1;
2816 }
2817 memcpy(result_s, start, end-start);
2818
2819 return result;
2820}
2821
2822/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2823
2824static PyStringObject *
2825replace_delete_substring(PyStringObject *self, PyStringObject *from,
2826 Py_ssize_t maxcount) {
2827 char *self_s, *from_s, *result_s;
2828 char *start, *next, *end;
2829 Py_ssize_t self_len, from_len, result_len;
2830 Py_ssize_t count, offset;
2831 PyStringObject *result;
2832
2833 self_len = PyString_GET_SIZE(self);
2834 self_s = PyString_AS_STRING(self);
2835 from_len = PyString_GET_SIZE(from);
2836 from_s = PyString_AS_STRING(from);
2837
2838 count = countstring(self_s, self_len,
2839 from_s, from_len,
2840 0, self_len, 1);
2841
2842 if (count > maxcount)
2843 count = maxcount;
2844
2845 if (count == 0) {
2846 /* no matches */
2847 return return_self(self);
2848 }
2849
2850 result_len = self_len - (count * from_len);
2851 assert (result_len>=0);
2852
2853 if ( (result = (PyStringObject *)
2854 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2855 return NULL;
2856
2857 result_s = PyString_AS_STRING(result);
2858
2859 start = self_s;
2860 end = self_s + self_len;
2861 while (count-- > 0) {
2862 offset = findstring(start, end-start,
2863 from_s, from_len,
2864 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002865 if (offset == -1)
2866 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 next = start + offset;
2868
2869 memcpy(result_s, start, next-start);
2870
2871 result_s += (next-start);
2872 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002873 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 memcpy(result_s, start, end-start);
2875 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002876}
2877
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2879static PyStringObject *
2880replace_single_character_in_place(PyStringObject *self,
2881 char from_c, char to_c,
2882 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002883{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002884 char *self_s, *result_s, *start, *end, *next;
2885 Py_ssize_t self_len;
2886 PyStringObject *result;
2887
2888 /* The result string will be the same size */
2889 self_s = PyString_AS_STRING(self);
2890 self_len = PyString_GET_SIZE(self);
2891
2892 next = findchar(self_s, self_len, from_c);
2893
2894 if (next == NULL) {
2895 /* No matches; return the original string */
2896 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002897 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898
2899 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002900 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 if (result == NULL)
2902 return NULL;
2903 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002904 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002905
2906 /* change everything in-place, starting with this one */
2907 start = result_s + (next-self_s);
2908 *start = to_c;
2909 start++;
2910 end = result_s + self_len;
2911
2912 while (--maxcount > 0) {
2913 next = findchar(start, end-start, from_c);
2914 if (next == NULL)
2915 break;
2916 *next = to_c;
2917 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002918 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002919
2920 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002921}
2922
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002923/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2924static PyStringObject *
2925replace_substring_in_place(PyStringObject *self,
2926 PyStringObject *from,
2927 PyStringObject *to,
2928 Py_ssize_t maxcount)
2929{
2930 char *result_s, *start, *end;
2931 char *self_s, *from_s, *to_s;
2932 Py_ssize_t self_len, from_len, offset;
2933 PyStringObject *result;
2934
2935 /* The result string will be the same size */
2936
2937 self_s = PyString_AS_STRING(self);
2938 self_len = PyString_GET_SIZE(self);
2939
2940 from_s = PyString_AS_STRING(from);
2941 from_len = PyString_GET_SIZE(from);
2942 to_s = PyString_AS_STRING(to);
2943
2944 offset = findstring(self_s, self_len,
2945 from_s, from_len,
2946 0, self_len, FORWARD);
2947
2948 if (offset == -1) {
2949 /* No matches; return the original string */
2950 return return_self(self);
2951 }
2952
2953 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002954 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002955 if (result == NULL)
2956 return NULL;
2957 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002958 memcpy(result_s, self_s, self_len);
2959
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002960
2961 /* change everything in-place, starting with this one */
2962 start = result_s + offset;
2963 memcpy(start, to_s, from_len);
2964 start += from_len;
2965 end = result_s + self_len;
2966
2967 while ( --maxcount > 0) {
2968 offset = findstring(start, end-start,
2969 from_s, from_len,
2970 0, end-start, FORWARD);
2971 if (offset==-1)
2972 break;
2973 memcpy(start+offset, to_s, from_len);
2974 start += offset+from_len;
2975 }
2976
2977 return result;
2978}
2979
2980/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2981static PyStringObject *
2982replace_single_character(PyStringObject *self,
2983 char from_c,
2984 PyStringObject *to,
2985 Py_ssize_t maxcount)
2986{
2987 char *self_s, *to_s, *result_s;
2988 char *start, *next, *end;
2989 Py_ssize_t self_len, to_len, result_len;
2990 Py_ssize_t count, product;
2991 PyStringObject *result;
2992
2993 self_s = PyString_AS_STRING(self);
2994 self_len = PyString_GET_SIZE(self);
2995
2996 count = countchar(self_s, self_len, from_c);
2997 if (count > maxcount)
2998 count = maxcount;
2999
3000 if (count == 0) {
3001 /* no matches, return unchanged */
3002 return return_self(self);
3003 }
3004
3005 to_s = PyString_AS_STRING(to);
3006 to_len = PyString_GET_SIZE(to);
3007
3008 /* use the difference between current and new, hence the "-1" */
3009 /* result_len = self_len + count * (to_len-1) */
3010 product = count * (to_len-1);
3011 if (product / (to_len-1) != count) {
3012 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3013 return NULL;
3014 }
3015 result_len = self_len + product;
3016 if (result_len < 0) {
3017 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3018 return NULL;
3019 }
3020
3021 if ( (result = (PyStringObject *)
3022 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3023 return NULL;
3024 result_s = PyString_AS_STRING(result);
3025
3026 start = self_s;
3027 end = self_s + self_len;
3028 while (count-- > 0) {
3029 next = findchar(start, end-start, from_c);
3030 if (next == NULL)
3031 break;
3032
3033 if (next == start) {
3034 /* replace with the 'to' */
3035 memcpy(result_s, to_s, to_len);
3036 result_s += to_len;
3037 start += 1;
3038 } else {
3039 /* copy the unchanged old then the 'to' */
3040 memcpy(result_s, start, next-start);
3041 result_s += (next-start);
3042 memcpy(result_s, to_s, to_len);
3043 result_s += to_len;
3044 start = next+1;
3045 }
3046 }
3047 /* Copy the remainder of the remaining string */
3048 memcpy(result_s, start, end-start);
3049
3050 return result;
3051}
3052
3053/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3054static PyStringObject *
3055replace_substring(PyStringObject *self,
3056 PyStringObject *from,
3057 PyStringObject *to,
3058 Py_ssize_t maxcount) {
3059 char *self_s, *from_s, *to_s, *result_s;
3060 char *start, *next, *end;
3061 Py_ssize_t self_len, from_len, to_len, result_len;
3062 Py_ssize_t count, offset, product;
3063 PyStringObject *result;
3064
3065 self_s = PyString_AS_STRING(self);
3066 self_len = PyString_GET_SIZE(self);
3067 from_s = PyString_AS_STRING(from);
3068 from_len = PyString_GET_SIZE(from);
3069
3070 count = countstring(self_s, self_len,
3071 from_s, from_len,
3072 0, self_len, FORWARD);
3073 if (count > maxcount)
3074 count = maxcount;
3075
3076 if (count == 0) {
3077 /* no matches, return unchanged */
3078 return return_self(self);
3079 }
3080
3081 to_s = PyString_AS_STRING(to);
3082 to_len = PyString_GET_SIZE(to);
3083
3084 /* Check for overflow */
3085 /* result_len = self_len + count * (to_len-from_len) */
3086 product = count * (to_len-from_len);
3087 if (product / (to_len-from_len) != count) {
3088 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3089 return NULL;
3090 }
3091 result_len = self_len + product;
3092 if (result_len < 0) {
3093 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3094 return NULL;
3095 }
3096
3097 if ( (result = (PyStringObject *)
3098 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3099 return NULL;
3100 result_s = PyString_AS_STRING(result);
3101
3102 start = self_s;
3103 end = self_s + self_len;
3104 while (count-- > 0) {
3105 offset = findstring(start, end-start,
3106 from_s, from_len,
3107 0, end-start, FORWARD);
3108 if (offset == -1)
3109 break;
3110 next = start+offset;
3111 if (next == start) {
3112 /* replace with the 'to' */
3113 memcpy(result_s, to_s, to_len);
3114 result_s += to_len;
3115 start += from_len;
3116 } else {
3117 /* copy the unchanged old then the 'to' */
3118 memcpy(result_s, start, next-start);
3119 result_s += (next-start);
3120 memcpy(result_s, to_s, to_len);
3121 result_s += to_len;
3122 start = next+from_len;
3123 }
3124 }
3125 /* Copy the remainder of the remaining string */
3126 memcpy(result_s, start, end-start);
3127
3128 return result;
3129}
3130
3131
3132static PyStringObject *
3133replace(PyStringObject *self,
3134 PyStringObject *from,
3135 PyStringObject *to,
3136 Py_ssize_t maxcount)
3137{
3138 Py_ssize_t from_len, to_len;
3139
3140 if (maxcount < 0) {
3141 maxcount = PY_SSIZE_T_MAX;
3142 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3143 /* nothing to do; return the original string */
3144 return return_self(self);
3145 }
3146
3147 from_len = PyString_GET_SIZE(from);
3148 to_len = PyString_GET_SIZE(to);
3149
3150 if (maxcount == 0 ||
3151 (from_len == 0 && to_len == 0)) {
3152 /* nothing to do; return the original string */
3153 return return_self(self);
3154 }
3155
3156 /* Handle zero-length special cases */
3157
3158 if (from_len == 0) {
3159 /* insert the 'to' string everywhere. */
3160 /* >>> "Python".replace("", ".") */
3161 /* '.P.y.t.h.o.n.' */
3162 return replace_interleave(self, to, maxcount);
3163 }
3164
3165 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3166 /* point for an empty self string to generate a non-empty string */
3167 /* Special case so the remaining code always gets a non-empty string */
3168 if (PyString_GET_SIZE(self) == 0) {
3169 return return_self(self);
3170 }
3171
3172 if (to_len == 0) {
3173 /* delete all occurances of 'from' string */
3174 if (from_len == 1) {
3175 return replace_delete_single_character(
3176 self, PyString_AS_STRING(from)[0], maxcount);
3177 } else {
3178 return replace_delete_substring(self, from, maxcount);
3179 }
3180 }
3181
3182 /* Handle special case where both strings have the same length */
3183
3184 if (from_len == to_len) {
3185 if (from_len == 1) {
3186 return replace_single_character_in_place(
3187 self,
3188 PyString_AS_STRING(from)[0],
3189 PyString_AS_STRING(to)[0],
3190 maxcount);
3191 } else {
3192 return replace_substring_in_place(
3193 self, from, to, maxcount);
3194 }
3195 }
3196
3197 /* Otherwise use the more generic algorithms */
3198 if (from_len == 1) {
3199 return replace_single_character(self, PyString_AS_STRING(from)[0],
3200 to, maxcount);
3201 } else {
3202 /* len('from')>=2, len('to')>=1 */
3203 return replace_substring(self, from, to, maxcount);
3204 }
3205}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003206
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003208"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209\n\
3210Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003211old replaced by new. If the optional argument count is\n\
3212given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213
3214static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003215string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003217 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003218 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003219 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003220 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003221
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003222 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003225 if (PyString_Check(from)) {
3226 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003227 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003228#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003229 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003230 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003231 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003232#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003233 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 return NULL;
3235
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003236 if (PyString_Check(to)) {
3237 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003239#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003240 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003241 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003242 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003243#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003244 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003245 return NULL;
3246
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003247 return (PyObject *)replace((PyStringObject *) self,
3248 (PyStringObject *) from,
3249 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003250}
3251
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003252/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003253
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003254PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003255"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003257Return True if S starts with the specified prefix, False otherwise.\n\
3258With optional start, test S beginning at that position.\n\
3259With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003260
3261static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003262string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003263{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003264 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003265 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003267 Py_ssize_t plen;
3268 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003269 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003271
Guido van Rossumc6821402000-05-08 14:08:05 +00003272 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3273 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003274 return NULL;
3275 if (PyString_Check(subobj)) {
3276 prefix = PyString_AS_STRING(subobj);
3277 plen = PyString_GET_SIZE(subobj);
3278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003279#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003280 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003281 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003282 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003283 subobj, start, end, -1);
3284 if (rc == -1)
3285 return NULL;
3286 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003287 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003288 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003289#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003291 return NULL;
3292
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003293 string_adjust_indices(&start, &end, len);
3294
3295 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003296 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003297
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003298 if (end-start >= plen)
3299 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3300 else
3301 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003302}
3303
3304
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003305PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003306"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003307\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003308Return True if S ends with the specified suffix, False otherwise.\n\
3309With optional start, test S beginning at that position.\n\
3310With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003311
3312static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003313string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003314{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003315 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003316 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003317 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003318 Py_ssize_t slen;
3319 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003320 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003321 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003322
Guido van Rossumc6821402000-05-08 14:08:05 +00003323 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3324 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003325 return NULL;
3326 if (PyString_Check(subobj)) {
3327 suffix = PyString_AS_STRING(subobj);
3328 slen = PyString_GET_SIZE(subobj);
3329 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003330#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003331 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003332 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003333 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003334 subobj, start, end, +1);
3335 if (rc == -1)
3336 return NULL;
3337 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003338 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003339 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003340#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003341 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003342 return NULL;
3343
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003344 string_adjust_indices(&start, &end, len);
3345
3346 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003347 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003348
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003349 if (end-slen > start)
3350 start = end - slen;
3351 if (end-start >= slen)
3352 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3353 else
3354 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003355}
3356
3357
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003358PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003359"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003360\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003361Encodes S using the codec registered for encoding. encoding defaults\n\
3362to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003363handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003364a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3365'xmlcharrefreplace' as well as any other name registered with\n\
3366codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003367
3368static PyObject *
3369string_encode(PyStringObject *self, PyObject *args)
3370{
3371 char *encoding = NULL;
3372 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003373 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003374
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003375 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3376 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003377 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003378 if (v == NULL)
3379 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003380 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3381 PyErr_Format(PyExc_TypeError,
3382 "encoder did not return a string/unicode object "
3383 "(type=%.400s)",
3384 v->ob_type->tp_name);
3385 Py_DECREF(v);
3386 return NULL;
3387 }
3388 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003389
3390 onError:
3391 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003392}
3393
3394
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003395PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003396"S.decode([encoding[,errors]]) -> object\n\
3397\n\
3398Decodes S using the codec registered for encoding. encoding defaults\n\
3399to the default encoding. errors may be given to set a different error\n\
3400handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003401a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3402as well as any other name registerd with codecs.register_error that is\n\
3403able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003404
3405static PyObject *
3406string_decode(PyStringObject *self, PyObject *args)
3407{
3408 char *encoding = NULL;
3409 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003410 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003411
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003412 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3413 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003414 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003415 if (v == NULL)
3416 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003417 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3418 PyErr_Format(PyExc_TypeError,
3419 "decoder did not return a string/unicode object "
3420 "(type=%.400s)",
3421 v->ob_type->tp_name);
3422 Py_DECREF(v);
3423 return NULL;
3424 }
3425 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003426
3427 onError:
3428 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003429}
3430
3431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003432PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433"S.expandtabs([tabsize]) -> string\n\
3434\n\
3435Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003436If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003437
3438static PyObject*
3439string_expandtabs(PyStringObject *self, PyObject *args)
3440{
3441 const char *e, *p;
3442 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003443 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003444 PyObject *u;
3445 int tabsize = 8;
3446
3447 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3448 return NULL;
3449
Thomas Wouters7e474022000-07-16 12:04:32 +00003450 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451 i = j = 0;
3452 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3453 for (p = PyString_AS_STRING(self); p < e; p++)
3454 if (*p == '\t') {
3455 if (tabsize > 0)
3456 j += tabsize - (j % tabsize);
3457 }
3458 else {
3459 j++;
3460 if (*p == '\n' || *p == '\r') {
3461 i += j;
3462 j = 0;
3463 }
3464 }
3465
3466 /* Second pass: create output string and fill it */
3467 u = PyString_FromStringAndSize(NULL, i + j);
3468 if (!u)
3469 return NULL;
3470
3471 j = 0;
3472 q = PyString_AS_STRING(u);
3473
3474 for (p = PyString_AS_STRING(self); p < e; p++)
3475 if (*p == '\t') {
3476 if (tabsize > 0) {
3477 i = tabsize - (j % tabsize);
3478 j += i;
3479 while (i--)
3480 *q++ = ' ';
3481 }
3482 }
3483 else {
3484 j++;
3485 *q++ = *p;
3486 if (*p == '\n' || *p == '\r')
3487 j = 0;
3488 }
3489
3490 return u;
3491}
3492
Tim Peters8fa5dd02001-09-12 02:18:30 +00003493static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003494pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003495{
3496 PyObject *u;
3497
3498 if (left < 0)
3499 left = 0;
3500 if (right < 0)
3501 right = 0;
3502
Tim Peters8fa5dd02001-09-12 02:18:30 +00003503 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003504 Py_INCREF(self);
3505 return (PyObject *)self;
3506 }
3507
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003508 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509 left + PyString_GET_SIZE(self) + right);
3510 if (u) {
3511 if (left)
3512 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003513 memcpy(PyString_AS_STRING(u) + left,
3514 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003515 PyString_GET_SIZE(self));
3516 if (right)
3517 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3518 fill, right);
3519 }
3520
3521 return u;
3522}
3523
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003524PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003525"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003526"\n"
3527"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003528"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529
3530static PyObject *
3531string_ljust(PyStringObject *self, PyObject *args)
3532{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003533 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003534 char fillchar = ' ';
3535
Thomas Wouters4abb3662006-04-19 14:50:15 +00003536 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537 return NULL;
3538
Tim Peters8fa5dd02001-09-12 02:18:30 +00003539 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540 Py_INCREF(self);
3541 return (PyObject*) self;
3542 }
3543
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003544 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545}
3546
3547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003549"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003550"\n"
3551"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003552"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003553
3554static PyObject *
3555string_rjust(PyStringObject *self, PyObject *args)
3556{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003557 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003558 char fillchar = ' ';
3559
Thomas Wouters4abb3662006-04-19 14:50:15 +00003560 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561 return NULL;
3562
Tim Peters8fa5dd02001-09-12 02:18:30 +00003563 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003564 Py_INCREF(self);
3565 return (PyObject*) self;
3566 }
3567
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003568 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569}
3570
3571
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003572PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003573"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003574"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003575"Return S centered in a string of length width. Padding is\n"
3576"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003577
3578static PyObject *
3579string_center(PyStringObject *self, PyObject *args)
3580{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003581 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003582 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003583 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003584
Thomas Wouters4abb3662006-04-19 14:50:15 +00003585 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586 return NULL;
3587
Tim Peters8fa5dd02001-09-12 02:18:30 +00003588 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589 Py_INCREF(self);
3590 return (PyObject*) self;
3591 }
3592
3593 marg = width - PyString_GET_SIZE(self);
3594 left = marg / 2 + (marg & width & 1);
3595
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003596 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003597}
3598
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003599PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003600"S.zfill(width) -> string\n"
3601"\n"
3602"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003603"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003604
3605static PyObject *
3606string_zfill(PyStringObject *self, PyObject *args)
3607{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003608 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003609 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003610 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003611 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003612
Thomas Wouters4abb3662006-04-19 14:50:15 +00003613 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003614 return NULL;
3615
3616 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003617 if (PyString_CheckExact(self)) {
3618 Py_INCREF(self);
3619 return (PyObject*) self;
3620 }
3621 else
3622 return PyString_FromStringAndSize(
3623 PyString_AS_STRING(self),
3624 PyString_GET_SIZE(self)
3625 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003626 }
3627
3628 fill = width - PyString_GET_SIZE(self);
3629
3630 s = pad(self, fill, 0, '0');
3631
3632 if (s == NULL)
3633 return NULL;
3634
3635 p = PyString_AS_STRING(s);
3636 if (p[fill] == '+' || p[fill] == '-') {
3637 /* move sign to beginning of string */
3638 p[0] = p[fill];
3639 p[fill] = '0';
3640 }
3641
3642 return (PyObject*) s;
3643}
3644
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003645PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003646"S.isspace() -> bool\n\
3647\n\
3648Return True if all characters in S are whitespace\n\
3649and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650
3651static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003652string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653{
Fred Drakeba096332000-07-09 07:04:36 +00003654 register const unsigned char *p
3655 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003656 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658 /* Shortcut for single character strings */
3659 if (PyString_GET_SIZE(self) == 1 &&
3660 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003662
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003663 /* Special case for empty strings */
3664 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003665 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003666
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667 e = p + PyString_GET_SIZE(self);
3668 for (; p < e; p++) {
3669 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673}
3674
3675
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003678\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003679Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003680and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003681
3682static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003683string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684{
Fred Drakeba096332000-07-09 07:04:36 +00003685 register const unsigned char *p
3686 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003687 register const unsigned char *e;
3688
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003689 /* Shortcut for single character strings */
3690 if (PyString_GET_SIZE(self) == 1 &&
3691 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003693
3694 /* Special case for empty strings */
3695 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003696 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003697
3698 e = p + PyString_GET_SIZE(self);
3699 for (; p < e; p++) {
3700 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003702 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003703 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003704}
3705
3706
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003707PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003709\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003710Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003711and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003712
3713static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003714string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003715{
Fred Drakeba096332000-07-09 07:04:36 +00003716 register const unsigned char *p
3717 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003718 register const unsigned char *e;
3719
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003720 /* Shortcut for single character strings */
3721 if (PyString_GET_SIZE(self) == 1 &&
3722 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003724
3725 /* Special case for empty strings */
3726 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003728
3729 e = p + PyString_GET_SIZE(self);
3730 for (; p < e; p++) {
3731 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003732 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003733 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003734 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003735}
3736
3737
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003738PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003739"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003741Return True if all characters in S are digits\n\
3742and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743
3744static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003745string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746{
Fred Drakeba096332000-07-09 07:04:36 +00003747 register const unsigned char *p
3748 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003749 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003750
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751 /* Shortcut for single character strings */
3752 if (PyString_GET_SIZE(self) == 1 &&
3753 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003756 /* Special case for empty strings */
3757 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003758 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003759
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 e = p + PyString_GET_SIZE(self);
3761 for (; p < e; p++) {
3762 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003763 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003765 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766}
3767
3768
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003769PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003770"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003772Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003773at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774
3775static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003776string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777{
Fred Drakeba096332000-07-09 07:04:36 +00003778 register const unsigned char *p
3779 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003780 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781 int cased;
3782
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783 /* Shortcut for single character strings */
3784 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003785 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003787 /* Special case for empty strings */
3788 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003789 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003790
Guido van Rossum4c08d552000-03-10 22:55:18 +00003791 e = p + PyString_GET_SIZE(self);
3792 cased = 0;
3793 for (; p < e; p++) {
3794 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003795 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003796 else if (!cased && islower(*p))
3797 cased = 1;
3798 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003799 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800}
3801
3802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003803PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003804"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003806Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003807at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808
3809static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003810string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811{
Fred Drakeba096332000-07-09 07:04:36 +00003812 register const unsigned char *p
3813 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003814 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 int cased;
3816
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 /* Shortcut for single character strings */
3818 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003819 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003821 /* Special case for empty strings */
3822 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003823 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003824
Guido van Rossum4c08d552000-03-10 22:55:18 +00003825 e = p + PyString_GET_SIZE(self);
3826 cased = 0;
3827 for (; p < e; p++) {
3828 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003829 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830 else if (!cased && isupper(*p))
3831 cased = 1;
3832 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003833 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834}
3835
3836
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003837PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003838"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003839\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003840Return True if S is a titlecased string and there is at least one\n\
3841character in S, i.e. uppercase characters may only follow uncased\n\
3842characters and lowercase characters only cased ones. Return False\n\
3843otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844
3845static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003846string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003847{
Fred Drakeba096332000-07-09 07:04:36 +00003848 register const unsigned char *p
3849 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003850 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003851 int cased, previous_is_cased;
3852
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853 /* Shortcut for single character strings */
3854 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003855 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003856
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003857 /* Special case for empty strings */
3858 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003859 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003860
Guido van Rossum4c08d552000-03-10 22:55:18 +00003861 e = p + PyString_GET_SIZE(self);
3862 cased = 0;
3863 previous_is_cased = 0;
3864 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003865 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003866
3867 if (isupper(ch)) {
3868 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003869 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003870 previous_is_cased = 1;
3871 cased = 1;
3872 }
3873 else if (islower(ch)) {
3874 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003875 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003876 previous_is_cased = 1;
3877 cased = 1;
3878 }
3879 else
3880 previous_is_cased = 0;
3881 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003882 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003883}
3884
3885
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003886PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003887"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003888\n\
3889Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003890Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003891is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003892
Guido van Rossum4c08d552000-03-10 22:55:18 +00003893static PyObject*
3894string_splitlines(PyStringObject *self, PyObject *args)
3895{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003896 register Py_ssize_t i;
3897 register Py_ssize_t j;
3898 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003899 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003900 PyObject *list;
3901 PyObject *str;
3902 char *data;
3903
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003904 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003905 return NULL;
3906
3907 data = PyString_AS_STRING(self);
3908 len = PyString_GET_SIZE(self);
3909
Guido van Rossum4c08d552000-03-10 22:55:18 +00003910 list = PyList_New(0);
3911 if (!list)
3912 goto onError;
3913
3914 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003915 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003916
Guido van Rossum4c08d552000-03-10 22:55:18 +00003917 /* Find a line and append it */
3918 while (i < len && data[i] != '\n' && data[i] != '\r')
3919 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003920
3921 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003922 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003923 if (i < len) {
3924 if (data[i] == '\r' && i + 1 < len &&
3925 data[i+1] == '\n')
3926 i += 2;
3927 else
3928 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003929 if (keepends)
3930 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003931 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003932 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003933 j = i;
3934 }
3935 if (j < len) {
3936 SPLIT_APPEND(data, j, len);
3937 }
3938
3939 return list;
3940
3941 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003942 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003943 return NULL;
3944}
3945
3946#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003947#undef SPLIT_ADD
3948#undef MAX_PREALLOC
3949#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003950
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003951static PyObject *
3952string_getnewargs(PyStringObject *v)
3953{
3954 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3955}
3956
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003957
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003958static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003959string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003960 /* Counterparts of the obsolete stropmodule functions; except
3961 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003962 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3963 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003964 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003965 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3966 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003967 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3968 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3969 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3970 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3971 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3972 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3973 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003974 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3975 capitalize__doc__},
3976 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3977 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3978 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003979 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003980 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3981 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3982 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3983 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3984 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3985 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3986 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3987 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3988 startswith__doc__},
3989 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3990 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3991 swapcase__doc__},
3992 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3993 translate__doc__},
3994 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3995 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3996 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3997 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3998 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3999 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4000 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4001 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4002 expandtabs__doc__},
4003 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4004 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00004005 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004006 {NULL, NULL} /* sentinel */
4007};
4008
Jeremy Hylton938ace62002-07-17 16:30:39 +00004009static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004010str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004012static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004013string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004014{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004015 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004016 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004017
Guido van Rossumae960af2001-08-30 03:11:59 +00004018 if (type != &PyString_Type)
4019 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004020 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4021 return NULL;
4022 if (x == NULL)
4023 return PyString_FromString("");
4024 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004025}
4026
Guido van Rossumae960af2001-08-30 03:11:59 +00004027static PyObject *
4028str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4029{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004030 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004031 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004032
4033 assert(PyType_IsSubtype(type, &PyString_Type));
4034 tmp = string_new(&PyString_Type, args, kwds);
4035 if (tmp == NULL)
4036 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004037 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004038 n = PyString_GET_SIZE(tmp);
4039 pnew = type->tp_alloc(type, n);
4040 if (pnew != NULL) {
4041 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004042 ((PyStringObject *)pnew)->ob_shash =
4043 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004044 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004045 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004046 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004047 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004048}
4049
Guido van Rossumcacfc072002-05-24 19:01:59 +00004050static PyObject *
4051basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4052{
4053 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004054 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004055 return NULL;
4056}
4057
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004058static PyObject *
4059string_mod(PyObject *v, PyObject *w)
4060{
4061 if (!PyString_Check(v)) {
4062 Py_INCREF(Py_NotImplemented);
4063 return Py_NotImplemented;
4064 }
4065 return PyString_Format(v, w);
4066}
4067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004068PyDoc_STRVAR(basestring_doc,
4069"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004070
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004071static PyNumberMethods string_as_number = {
4072 0, /*nb_add*/
4073 0, /*nb_subtract*/
4074 0, /*nb_multiply*/
4075 0, /*nb_divide*/
4076 string_mod, /*nb_remainder*/
4077};
4078
4079
Guido van Rossumcacfc072002-05-24 19:01:59 +00004080PyTypeObject PyBaseString_Type = {
4081 PyObject_HEAD_INIT(&PyType_Type)
4082 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004083 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004084 0,
4085 0,
4086 0, /* tp_dealloc */
4087 0, /* tp_print */
4088 0, /* tp_getattr */
4089 0, /* tp_setattr */
4090 0, /* tp_compare */
4091 0, /* tp_repr */
4092 0, /* tp_as_number */
4093 0, /* tp_as_sequence */
4094 0, /* tp_as_mapping */
4095 0, /* tp_hash */
4096 0, /* tp_call */
4097 0, /* tp_str */
4098 0, /* tp_getattro */
4099 0, /* tp_setattro */
4100 0, /* tp_as_buffer */
4101 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4102 basestring_doc, /* tp_doc */
4103 0, /* tp_traverse */
4104 0, /* tp_clear */
4105 0, /* tp_richcompare */
4106 0, /* tp_weaklistoffset */
4107 0, /* tp_iter */
4108 0, /* tp_iternext */
4109 0, /* tp_methods */
4110 0, /* tp_members */
4111 0, /* tp_getset */
4112 &PyBaseObject_Type, /* tp_base */
4113 0, /* tp_dict */
4114 0, /* tp_descr_get */
4115 0, /* tp_descr_set */
4116 0, /* tp_dictoffset */
4117 0, /* tp_init */
4118 0, /* tp_alloc */
4119 basestring_new, /* tp_new */
4120 0, /* tp_free */
4121};
4122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004123PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004124"str(object) -> string\n\
4125\n\
4126Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004127If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004128
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004129PyTypeObject PyString_Type = {
4130 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004131 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004132 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004133 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004134 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004135 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004136 (printfunc)string_print, /* tp_print */
4137 0, /* tp_getattr */
4138 0, /* tp_setattr */
4139 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004140 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004141 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004142 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004143 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004144 (hashfunc)string_hash, /* tp_hash */
4145 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004146 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004147 PyObject_GenericGetAttr, /* tp_getattro */
4148 0, /* tp_setattro */
4149 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004150 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004151 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004152 string_doc, /* tp_doc */
4153 0, /* tp_traverse */
4154 0, /* tp_clear */
4155 (richcmpfunc)string_richcompare, /* tp_richcompare */
4156 0, /* tp_weaklistoffset */
4157 0, /* tp_iter */
4158 0, /* tp_iternext */
4159 string_methods, /* tp_methods */
4160 0, /* tp_members */
4161 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004162 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004163 0, /* tp_dict */
4164 0, /* tp_descr_get */
4165 0, /* tp_descr_set */
4166 0, /* tp_dictoffset */
4167 0, /* tp_init */
4168 0, /* tp_alloc */
4169 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004170 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004171};
4172
4173void
Fred Drakeba096332000-07-09 07:04:36 +00004174PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004175{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004176 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004177 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004178 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004179 if (w == NULL || !PyString_Check(*pv)) {
4180 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004181 *pv = NULL;
4182 return;
4183 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004184 v = string_concat((PyStringObject *) *pv, w);
4185 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004186 *pv = v;
4187}
4188
Guido van Rossum013142a1994-08-30 08:19:36 +00004189void
Fred Drakeba096332000-07-09 07:04:36 +00004190PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004191{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004192 PyString_Concat(pv, w);
4193 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004194}
4195
4196
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004197/* The following function breaks the notion that strings are immutable:
4198 it changes the size of a string. We get away with this only if there
4199 is only one module referencing the object. You can also think of it
4200 as creating a new string object and destroying the old one, only
4201 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004202 already be known to some other part of the code...
4203 Note that if there's not enough memory to resize the string, the original
4204 string object at *pv is deallocated, *pv is set to NULL, an "out of
4205 memory" exception is set, and -1 is returned. Else (on success) 0 is
4206 returned, and the value in *pv may or may not be the same as on input.
4207 As always, an extra byte is allocated for a trailing \0 byte (newsize
4208 does *not* include that), and a trailing \0 byte is stored.
4209*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004210
4211int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004212_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004213{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004214 register PyObject *v;
4215 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004216 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004217 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4218 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004219 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 Py_DECREF(v);
4221 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004222 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004223 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004224 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004225 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004226 _Py_ForgetReference(v);
4227 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004228 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004229 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004230 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004231 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004232 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004233 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004234 _Py_NewReference(*pv);
4235 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004236 sv->ob_size = newsize;
4237 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004238 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004239 return 0;
4240}
Guido van Rossume5372401993-03-16 12:15:04 +00004241
4242/* Helpers for formatstring */
4243
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004244static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004245getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004246{
Thomas Wouters977485d2006-02-16 15:59:12 +00004247 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004248 if (argidx < arglen) {
4249 (*p_argidx)++;
4250 if (arglen < 0)
4251 return args;
4252 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004253 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004254 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004255 PyErr_SetString(PyExc_TypeError,
4256 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004257 return NULL;
4258}
4259
Tim Peters38fd5b62000-09-21 05:43:11 +00004260/* Format codes
4261 * F_LJUST '-'
4262 * F_SIGN '+'
4263 * F_BLANK ' '
4264 * F_ALT '#'
4265 * F_ZERO '0'
4266 */
Guido van Rossume5372401993-03-16 12:15:04 +00004267#define F_LJUST (1<<0)
4268#define F_SIGN (1<<1)
4269#define F_BLANK (1<<2)
4270#define F_ALT (1<<3)
4271#define F_ZERO (1<<4)
4272
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004273static int
Fred Drakeba096332000-07-09 07:04:36 +00004274formatfloat(char *buf, size_t buflen, int flags,
4275 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004276{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004277 /* fmt = '%#.' + `prec` + `type`
4278 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004279 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004280 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004281 x = PyFloat_AsDouble(v);
4282 if (x == -1.0 && PyErr_Occurred()) {
4283 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004284 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004285 }
Guido van Rossume5372401993-03-16 12:15:04 +00004286 if (prec < 0)
4287 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004288 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4289 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004290 /* Worst case length calc to ensure no buffer overrun:
4291
4292 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004293 fmt = %#.<prec>g
4294 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004295 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004296 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004297
4298 'f' formats:
4299 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4300 len = 1 + 50 + 1 + prec = 52 + prec
4301
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004302 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004303 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004304
4305 */
4306 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4307 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004308 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004309 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004310 return -1;
4311 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004312 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4313 (flags&F_ALT) ? "#" : "",
4314 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004315 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004316 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004317}
4318
Tim Peters38fd5b62000-09-21 05:43:11 +00004319/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4320 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4321 * Python's regular ints.
4322 * Return value: a new PyString*, or NULL if error.
4323 * . *pbuf is set to point into it,
4324 * *plen set to the # of chars following that.
4325 * Caller must decref it when done using pbuf.
4326 * The string starting at *pbuf is of the form
4327 * "-"? ("0x" | "0X")? digit+
4328 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004329 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004330 * There will be at least prec digits, zero-filled on the left if
4331 * necessary to get that many.
4332 * val object to be converted
4333 * flags bitmask of format flags; only F_ALT is looked at
4334 * prec minimum number of digits; 0-fill on left if needed
4335 * type a character in [duoxX]; u acts the same as d
4336 *
4337 * CAUTION: o, x and X conversions on regular ints can never
4338 * produce a '-' sign, but can for Python's unbounded ints.
4339 */
4340PyObject*
4341_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4342 char **pbuf, int *plen)
4343{
4344 PyObject *result = NULL;
4345 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004346 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 int sign; /* 1 if '-', else 0 */
4348 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004349 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004350 int numdigits; /* len == numnondigits + numdigits */
4351 int numnondigits = 0;
4352
4353 switch (type) {
4354 case 'd':
4355 case 'u':
4356 result = val->ob_type->tp_str(val);
4357 break;
4358 case 'o':
4359 result = val->ob_type->tp_as_number->nb_oct(val);
4360 break;
4361 case 'x':
4362 case 'X':
4363 numnondigits = 2;
4364 result = val->ob_type->tp_as_number->nb_hex(val);
4365 break;
4366 default:
4367 assert(!"'type' not in [duoxX]");
4368 }
4369 if (!result)
4370 return NULL;
4371
4372 /* To modify the string in-place, there can only be one reference. */
4373 if (result->ob_refcnt != 1) {
4374 PyErr_BadInternalCall();
4375 return NULL;
4376 }
4377 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004378 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004379 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004380 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4381 return NULL;
4382 }
4383 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004384 if (buf[len-1] == 'L') {
4385 --len;
4386 buf[len] = '\0';
4387 }
4388 sign = buf[0] == '-';
4389 numnondigits += sign;
4390 numdigits = len - numnondigits;
4391 assert(numdigits > 0);
4392
Tim Petersfff53252001-04-12 18:38:48 +00004393 /* Get rid of base marker unless F_ALT */
4394 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004395 /* Need to skip 0x, 0X or 0. */
4396 int skipped = 0;
4397 switch (type) {
4398 case 'o':
4399 assert(buf[sign] == '0');
4400 /* If 0 is only digit, leave it alone. */
4401 if (numdigits > 1) {
4402 skipped = 1;
4403 --numdigits;
4404 }
4405 break;
4406 case 'x':
4407 case 'X':
4408 assert(buf[sign] == '0');
4409 assert(buf[sign + 1] == 'x');
4410 skipped = 2;
4411 numnondigits -= 2;
4412 break;
4413 }
4414 if (skipped) {
4415 buf += skipped;
4416 len -= skipped;
4417 if (sign)
4418 buf[0] = '-';
4419 }
4420 assert(len == numnondigits + numdigits);
4421 assert(numdigits > 0);
4422 }
4423
4424 /* Fill with leading zeroes to meet minimum width. */
4425 if (prec > numdigits) {
4426 PyObject *r1 = PyString_FromStringAndSize(NULL,
4427 numnondigits + prec);
4428 char *b1;
4429 if (!r1) {
4430 Py_DECREF(result);
4431 return NULL;
4432 }
4433 b1 = PyString_AS_STRING(r1);
4434 for (i = 0; i < numnondigits; ++i)
4435 *b1++ = *buf++;
4436 for (i = 0; i < prec - numdigits; i++)
4437 *b1++ = '0';
4438 for (i = 0; i < numdigits; i++)
4439 *b1++ = *buf++;
4440 *b1 = '\0';
4441 Py_DECREF(result);
4442 result = r1;
4443 buf = PyString_AS_STRING(result);
4444 len = numnondigits + prec;
4445 }
4446
4447 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004448 if (type == 'X') {
4449 /* Need to convert all lower case letters to upper case.
4450 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004451 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004452 if (buf[i] >= 'a' && buf[i] <= 'x')
4453 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004454 }
4455 *pbuf = buf;
4456 *plen = len;
4457 return result;
4458}
4459
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004460static int
Fred Drakeba096332000-07-09 07:04:36 +00004461formatint(char *buf, size_t buflen, int flags,
4462 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004463{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004464 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004465 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4466 + 1 + 1 = 24 */
4467 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004468 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004469 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004470
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004471 x = PyInt_AsLong(v);
4472 if (x == -1 && PyErr_Occurred()) {
4473 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004474 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004475 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004476 if (x < 0 && type == 'u') {
4477 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004478 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004479 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4480 sign = "-";
4481 else
4482 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004483 if (prec < 0)
4484 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004485
4486 if ((flags & F_ALT) &&
4487 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004488 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004489 * of issues that cause pain:
4490 * - when 0 is being converted, the C standard leaves off
4491 * the '0x' or '0X', which is inconsistent with other
4492 * %#x/%#X conversions and inconsistent with Python's
4493 * hex() function
4494 * - there are platforms that violate the standard and
4495 * convert 0 with the '0x' or '0X'
4496 * (Metrowerks, Compaq Tru64)
4497 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004498 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004499 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004500 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004501 * We can achieve the desired consistency by inserting our
4502 * own '0x' or '0X' prefix, and substituting %x/%X in place
4503 * of %#x/%#X.
4504 *
4505 * Note that this is the same approach as used in
4506 * formatint() in unicodeobject.c
4507 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004508 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4509 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004510 }
4511 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004512 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4513 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004514 prec, type);
4515 }
4516
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004517 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4518 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004519 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004520 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004521 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004522 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004523 return -1;
4524 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004525 if (sign[0])
4526 PyOS_snprintf(buf, buflen, fmt, -x);
4527 else
4528 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004529 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004530}
4531
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004532static int
Fred Drakeba096332000-07-09 07:04:36 +00004533formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004534{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004535 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004536 if (PyString_Check(v)) {
4537 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004538 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004539 }
4540 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004542 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004543 }
4544 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004545 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004546}
4547
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004548/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4549
4550 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4551 chars are formatted. XXX This is a magic number. Each formatting
4552 routine does bounds checking to ensure no overflow, but a better
4553 solution may be to malloc a buffer of appropriate size for each
4554 format. For now, the current solution is sufficient.
4555*/
4556#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004557
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004558PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004559PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004560{
4561 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004562 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004563 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004564 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004565 PyObject *result, *orig_args;
4566#ifdef Py_USING_UNICODE
4567 PyObject *v, *w;
4568#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004569 PyObject *dict = NULL;
4570 if (format == NULL || !PyString_Check(format) || args == NULL) {
4571 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004572 return NULL;
4573 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004574 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004575 fmt = PyString_AS_STRING(format);
4576 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004577 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004579 if (result == NULL)
4580 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004581 res = PyString_AsString(result);
4582 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004583 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004584 argidx = 0;
4585 }
4586 else {
4587 arglen = -1;
4588 argidx = -2;
4589 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004590 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4591 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004592 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004593 while (--fmtcnt >= 0) {
4594 if (*fmt != '%') {
4595 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004596 rescnt = fmtcnt + 100;
4597 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004598 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004599 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004600 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004601 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004602 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004603 }
4604 *res++ = *fmt++;
4605 }
4606 else {
4607 /* Got a format specifier */
4608 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004609 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004610 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004611 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004612 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004613 PyObject *v = NULL;
4614 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004615 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004616 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004617 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004618 char formatbuf[FORMATBUFLEN];
4619 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004620#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004621 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004622 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004623#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004624
Guido van Rossumda9c2711996-12-05 21:58:58 +00004625 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004626 if (*fmt == '(') {
4627 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004628 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004629 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004630 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004631
4632 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004634 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004635 goto error;
4636 }
4637 ++fmt;
4638 --fmtcnt;
4639 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004640 /* Skip over balanced parentheses */
4641 while (pcount > 0 && --fmtcnt >= 0) {
4642 if (*fmt == ')')
4643 --pcount;
4644 else if (*fmt == '(')
4645 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004646 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004647 }
4648 keylen = fmt - keystart - 1;
4649 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004650 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004651 "incomplete format key");
4652 goto error;
4653 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 key = PyString_FromStringAndSize(keystart,
4655 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004656 if (key == NULL)
4657 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004658 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004659 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004660 args_owned = 0;
4661 }
4662 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004663 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004664 if (args == NULL) {
4665 goto error;
4666 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004667 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004668 arglen = -1;
4669 argidx = -2;
4670 }
Guido van Rossume5372401993-03-16 12:15:04 +00004671 while (--fmtcnt >= 0) {
4672 switch (c = *fmt++) {
4673 case '-': flags |= F_LJUST; continue;
4674 case '+': flags |= F_SIGN; continue;
4675 case ' ': flags |= F_BLANK; continue;
4676 case '#': flags |= F_ALT; continue;
4677 case '0': flags |= F_ZERO; continue;
4678 }
4679 break;
4680 }
4681 if (c == '*') {
4682 v = getnextarg(args, arglen, &argidx);
4683 if (v == NULL)
4684 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004685 if (!PyInt_Check(v)) {
4686 PyErr_SetString(PyExc_TypeError,
4687 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004688 goto error;
4689 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004690 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004691 if (width < 0) {
4692 flags |= F_LJUST;
4693 width = -width;
4694 }
Guido van Rossume5372401993-03-16 12:15:04 +00004695 if (--fmtcnt >= 0)
4696 c = *fmt++;
4697 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004698 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004699 width = c - '0';
4700 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004701 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004702 if (!isdigit(c))
4703 break;
4704 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004705 PyErr_SetString(
4706 PyExc_ValueError,
4707 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004708 goto error;
4709 }
4710 width = width*10 + (c - '0');
4711 }
4712 }
4713 if (c == '.') {
4714 prec = 0;
4715 if (--fmtcnt >= 0)
4716 c = *fmt++;
4717 if (c == '*') {
4718 v = getnextarg(args, arglen, &argidx);
4719 if (v == NULL)
4720 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004721 if (!PyInt_Check(v)) {
4722 PyErr_SetString(
4723 PyExc_TypeError,
4724 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004725 goto error;
4726 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004727 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004728 if (prec < 0)
4729 prec = 0;
4730 if (--fmtcnt >= 0)
4731 c = *fmt++;
4732 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004733 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004734 prec = c - '0';
4735 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004736 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004737 if (!isdigit(c))
4738 break;
4739 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004740 PyErr_SetString(
4741 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004742 "prec too big");
4743 goto error;
4744 }
4745 prec = prec*10 + (c - '0');
4746 }
4747 }
4748 } /* prec */
4749 if (fmtcnt >= 0) {
4750 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004751 if (--fmtcnt >= 0)
4752 c = *fmt++;
4753 }
4754 }
4755 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004756 PyErr_SetString(PyExc_ValueError,
4757 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004758 goto error;
4759 }
4760 if (c != '%') {
4761 v = getnextarg(args, arglen, &argidx);
4762 if (v == NULL)
4763 goto error;
4764 }
4765 sign = 0;
4766 fill = ' ';
4767 switch (c) {
4768 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004769 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004770 len = 1;
4771 break;
4772 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004773#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004774 if (PyUnicode_Check(v)) {
4775 fmt = fmt_start;
4776 argidx = argidx_start;
4777 goto unicode;
4778 }
Georg Brandld45014b2005-10-01 17:06:00 +00004779#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004780 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004781#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004782 if (temp != NULL && PyUnicode_Check(temp)) {
4783 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004784 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004785 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004786 goto unicode;
4787 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004788#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004789 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004790 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004791 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004792 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004793 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004794 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004795 if (!PyString_Check(temp)) {
4796 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004797 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004798 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004799 goto error;
4800 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004801 pbuf = PyString_AS_STRING(temp);
4802 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004803 if (prec >= 0 && len > prec)
4804 len = prec;
4805 break;
4806 case 'i':
4807 case 'd':
4808 case 'u':
4809 case 'o':
4810 case 'x':
4811 case 'X':
4812 if (c == 'i')
4813 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004814 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004815 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004816 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004817 prec, c, &pbuf, &ilen);
4818 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004819 if (!temp)
4820 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004821 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004822 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 else {
4824 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004825 len = formatint(pbuf,
4826 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004827 flags, prec, c, v);
4828 if (len < 0)
4829 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004830 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004831 }
4832 if (flags & F_ZERO)
4833 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004834 break;
4835 case 'e':
4836 case 'E':
4837 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004838 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004839 case 'g':
4840 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004841 if (c == 'F')
4842 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004843 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004844 len = formatfloat(pbuf, sizeof(formatbuf),
4845 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004846 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004847 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004848 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004849 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004850 fill = '0';
4851 break;
4852 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004853#ifdef Py_USING_UNICODE
4854 if (PyUnicode_Check(v)) {
4855 fmt = fmt_start;
4856 argidx = argidx_start;
4857 goto unicode;
4858 }
4859#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004860 pbuf = formatbuf;
4861 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004862 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004863 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004864 break;
4865 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004866 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004867 "unsupported format character '%c' (0x%x) "
4868 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004869 c, c,
4870 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004871 goto error;
4872 }
4873 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004874 if (*pbuf == '-' || *pbuf == '+') {
4875 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004876 len--;
4877 }
4878 else if (flags & F_SIGN)
4879 sign = '+';
4880 else if (flags & F_BLANK)
4881 sign = ' ';
4882 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004883 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004884 }
4885 if (width < len)
4886 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004887 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004888 reslen -= rescnt;
4889 rescnt = width + fmtcnt + 100;
4890 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004891 if (reslen < 0) {
4892 Py_DECREF(result);
4893 return PyErr_NoMemory();
4894 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004895 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004896 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004897 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004898 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004899 }
4900 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004901 if (fill != ' ')
4902 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004903 rescnt--;
4904 if (width > len)
4905 width--;
4906 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004907 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4908 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004909 assert(pbuf[1] == c);
4910 if (fill != ' ') {
4911 *res++ = *pbuf++;
4912 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004913 }
Tim Petersfff53252001-04-12 18:38:48 +00004914 rescnt -= 2;
4915 width -= 2;
4916 if (width < 0)
4917 width = 0;
4918 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004919 }
4920 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004921 do {
4922 --rescnt;
4923 *res++ = fill;
4924 } while (--width > len);
4925 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004926 if (fill == ' ') {
4927 if (sign)
4928 *res++ = sign;
4929 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004930 (c == 'x' || c == 'X')) {
4931 assert(pbuf[0] == '0');
4932 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004933 *res++ = *pbuf++;
4934 *res++ = *pbuf++;
4935 }
4936 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004937 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004938 res += len;
4939 rescnt -= len;
4940 while (--width >= len) {
4941 --rescnt;
4942 *res++ = ' ';
4943 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004944 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004945 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004946 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004947 goto error;
4948 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004949 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004950 } /* '%' */
4951 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004952 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004953 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004954 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004955 goto error;
4956 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004957 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004958 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004959 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004960 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004961 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004962
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004963#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004964 unicode:
4965 if (args_owned) {
4966 Py_DECREF(args);
4967 args_owned = 0;
4968 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004969 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004970 if (PyTuple_Check(orig_args) && argidx > 0) {
4971 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004972 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004973 v = PyTuple_New(n);
4974 if (v == NULL)
4975 goto error;
4976 while (--n >= 0) {
4977 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4978 Py_INCREF(w);
4979 PyTuple_SET_ITEM(v, n, w);
4980 }
4981 args = v;
4982 } else {
4983 Py_INCREF(orig_args);
4984 args = orig_args;
4985 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004986 args_owned = 1;
4987 /* Take what we have of the result and let the Unicode formatting
4988 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004989 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004990 if (_PyString_Resize(&result, rescnt))
4991 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004992 fmtcnt = PyString_GET_SIZE(format) - \
4993 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004994 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4995 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004996 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004997 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004998 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004999 if (v == NULL)
5000 goto error;
5001 /* Paste what we have (result) to what the Unicode formatting
5002 function returned (v) and return the result (or error) */
5003 w = PyUnicode_Concat(result, v);
5004 Py_DECREF(result);
5005 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005006 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005007 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005008#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005009
Guido van Rossume5372401993-03-16 12:15:04 +00005010 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005011 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005012 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005013 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005014 }
Guido van Rossume5372401993-03-16 12:15:04 +00005015 return NULL;
5016}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005017
Guido van Rossum2a61e741997-01-18 07:55:05 +00005018void
Fred Drakeba096332000-07-09 07:04:36 +00005019PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005020{
5021 register PyStringObject *s = (PyStringObject *)(*p);
5022 PyObject *t;
5023 if (s == NULL || !PyString_Check(s))
5024 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005025 /* If it's a string subclass, we don't really know what putting
5026 it in the interned dict might do. */
5027 if (!PyString_CheckExact(s))
5028 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005029 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005030 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005031 if (interned == NULL) {
5032 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005033 if (interned == NULL) {
5034 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005035 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005036 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005037 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005038 t = PyDict_GetItem(interned, (PyObject *)s);
5039 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005040 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005041 Py_DECREF(*p);
5042 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005043 return;
5044 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005045
Armin Rigo79f7ad22004-08-07 19:27:39 +00005046 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005047 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005048 return;
5049 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005050 /* The two references in interned are not counted by refcnt.
5051 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00005052 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005053 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005054}
5055
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005056void
5057PyString_InternImmortal(PyObject **p)
5058{
5059 PyString_InternInPlace(p);
5060 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5061 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5062 Py_INCREF(*p);
5063 }
5064}
5065
Guido van Rossum2a61e741997-01-18 07:55:05 +00005066
5067PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005068PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005069{
5070 PyObject *s = PyString_FromString(cp);
5071 if (s == NULL)
5072 return NULL;
5073 PyString_InternInPlace(&s);
5074 return s;
5075}
5076
Guido van Rossum8cf04761997-08-02 02:57:45 +00005077void
Fred Drakeba096332000-07-09 07:04:36 +00005078PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005079{
5080 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005081 for (i = 0; i < UCHAR_MAX + 1; i++) {
5082 Py_XDECREF(characters[i]);
5083 characters[i] = NULL;
5084 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005085 Py_XDECREF(nullstring);
5086 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005087}
Barry Warsawa903ad982001-02-23 16:40:48 +00005088
Barry Warsawa903ad982001-02-23 16:40:48 +00005089void _Py_ReleaseInternedStrings(void)
5090{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005091 PyObject *keys;
5092 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005093 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005094
5095 if (interned == NULL || !PyDict_Check(interned))
5096 return;
5097 keys = PyDict_Keys(interned);
5098 if (keys == NULL || !PyList_Check(keys)) {
5099 PyErr_Clear();
5100 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005101 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005102
5103 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5104 detector, interned strings are not forcibly deallocated; rather, we
5105 give them their stolen references back, and then clear and DECREF
5106 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005107
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005108 fprintf(stderr, "releasing interned strings\n");
5109 n = PyList_GET_SIZE(keys);
5110 for (i = 0; i < n; i++) {
5111 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5112 switch (s->ob_sstate) {
5113 case SSTATE_NOT_INTERNED:
5114 /* XXX Shouldn't happen */
5115 break;
5116 case SSTATE_INTERNED_IMMORTAL:
5117 s->ob_refcnt += 1;
5118 break;
5119 case SSTATE_INTERNED_MORTAL:
5120 s->ob_refcnt += 2;
5121 break;
5122 default:
5123 Py_FatalError("Inconsistent interned string state.");
5124 }
5125 s->ob_sstate = SSTATE_NOT_INTERNED;
5126 }
5127 Py_DECREF(keys);
5128 PyDict_Clear(interned);
5129 Py_DECREF(interned);
5130 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005131}