blob: 70a2e7710a0c5103e138026f4c9415b3c13e6844 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossum45ec02a2002-08-19 21:43:18 +000015/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
Tim Petersae1d0c92006-03-17 03:29:34 +000020 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000021 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
82 memcpy(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 memcpy(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 memcpy(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Martin v. Löwis18e16552006-02-15 17:27:45 +0000692static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693string_getsize(register PyObject *op)
694{
695 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (PyString_AsStringAndSize(op, &s, &len))
698 return -1;
699 return len;
700}
701
702static /*const*/ char *
703string_getbuffer(register PyObject *op)
704{
705 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000706 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (PyString_AsStringAndSize(op, &s, &len))
708 return NULL;
709 return s;
710}
711
Martin v. Löwis18e16552006-02-15 17:27:45 +0000712Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000713PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (!PyString_Check(op))
716 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000717 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718}
719
720/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000721PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 if (!PyString_Check(op))
724 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726}
727
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728int
729PyString_AsStringAndSize(register PyObject *obj,
730 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732{
733 if (s == NULL) {
734 PyErr_BadInternalCall();
735 return -1;
736 }
737
738 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000739#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740 if (PyUnicode_Check(obj)) {
741 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
742 if (obj == NULL)
743 return -1;
744 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000745 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#endif
747 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000748 PyErr_Format(PyExc_TypeError,
749 "expected string or Unicode object, "
750 "%.200s found", obj->ob_type->tp_name);
751 return -1;
752 }
753 }
754
755 *s = PyString_AS_STRING(obj);
756 if (len != NULL)
757 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000758 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_SetString(PyExc_TypeError,
760 "expected string without null bytes");
761 return -1;
762 }
763 return 0;
764}
765
Fredrik Lundhaf722372006-05-25 17:55:31 +0000766/* -------------------------------------------------------------------- */
767/* Helpers */
768
769#define USE_FAST /* experimental fast search implementation */
770
771/* XXX - this code is copied from unicodeobject.c. we really should
772 refactor the core implementations (see _sre.c for how this can be
773 done), but that'll have to wait -- fredrik */
774
775/* fast search/count implementation, based on a mix between boyer-
776 moore and horspool, with a few more bells and whistles on the top.
777 for some more background, see: http://effbot.org/stringlib */
778
779/* note: fastsearch may access s[n], which isn't a problem when using
780 Python's ordinary string types, but may cause problems if you're
781 using this code in other contexts. also, the count mode returns -1
Andrew M. Kuchlingf344c942006-05-25 18:11:16 +0000782 if there cannot possibly be a match in the target string, and 0 if
Fredrik Lundhaf722372006-05-25 17:55:31 +0000783 it has actually checked for matches, but didn't find any. callers
784 beware! */
785
786#define FAST_COUNT 0
787#define FAST_SEARCH 1
788
Fredrik Lundh95e2a912006-05-26 11:38:15 +0000789Py_LOCAL(Py_ssize_t)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000790fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
Fredrik Lundhaf722372006-05-25 17:55:31 +0000791{
792 long mask;
Fredrik Lundhf2c0dfd2006-05-26 10:27:17 +0000793 Py_ssize_t skip, count = 0;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000794 Py_ssize_t i, j, mlast, w;
795
796 w = n - m;
797
798 if (w < 0)
799 return -1;
800
801 /* look for special cases */
802 if (m <= 1) {
803 if (m <= 0)
804 return -1;
805 /* use special case for 1-character strings */
806 if (mode == FAST_COUNT) {
807 for (i = 0; i < n; i++)
808 if (s[i] == p[0])
809 count++;
810 return count;
811 } else {
812 for (i = 0; i < n; i++)
813 if (s[i] == p[0])
814 return i;
815 }
816 return -1;
817 }
818
819 mlast = m - 1;
820
821 /* create compressed boyer-moore delta 1 table */
822 skip = mlast - 1;
823 /* process pattern[:-1] */
824 for (mask = i = 0; i < mlast; i++) {
825 mask |= (1 << (p[i] & 0x1F));
826 if (p[i] == p[mlast])
827 skip = mlast - i - 1;
828 }
829 /* process pattern[-1] outside the loop */
830 mask |= (1 << (p[mlast] & 0x1F));
831
832 for (i = 0; i <= w; i++) {
833 /* note: using mlast in the skip path slows things down on x86 */
834 if (s[i+m-1] == p[m-1]) {
835 /* candidate match */
836 for (j = 0; j < mlast; j++)
837 if (s[i+j] != p[j])
838 break;
839 if (j == mlast) {
840 /* got a match! */
841 if (mode != FAST_COUNT)
842 return i;
843 count++;
844 i = i + mlast;
845 continue;
846 }
847 /* miss: check if next character is part of pattern */
848 if (!(mask & (1 << (s[i+m] & 0x1F))))
849 i = i + m;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +0000850 else
Fredrik Lundhaf722372006-05-25 17:55:31 +0000851 i = i + skip;
Fredrik Lundhaf722372006-05-25 17:55:31 +0000852 } else {
853 /* skip: check if next character is part of pattern */
854 if (!(mask & (1 << (s[i+m] & 0x1F))))
855 i = i + m;
856 }
857 }
858
859 if (mode != FAST_COUNT)
860 return -1;
861 return count;
862}
863
864/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865/* Methods */
866
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000867static int
Fred Drakeba096332000-07-09 07:04:36 +0000868string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000870 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000872 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000873
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000874 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000875 if (! PyString_CheckExact(op)) {
876 int ret;
877 /* A str subclass may have its own __str__ method. */
878 op = (PyStringObject *) PyObject_Str((PyObject *)op);
879 if (op == NULL)
880 return -1;
881 ret = string_print(op, fp, flags);
882 Py_DECREF(op);
883 return ret;
884 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000886#ifdef __VMS
887 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
888#else
889 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
890#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000891 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000893
Thomas Wouters7e474022000-07-16 12:04:32 +0000894 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000896 if (memchr(op->ob_sval, '\'', op->ob_size) &&
897 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000898 quote = '"';
899
900 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 for (i = 0; i < op->ob_size; i++) {
902 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000903 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000904 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000905 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000906 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000907 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000908 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000909 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000913 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000914 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000916 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000917 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918}
919
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000920PyObject *
921PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000923 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000924 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000925 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000926 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000927 PyErr_SetString(PyExc_OverflowError,
928 "string is too large to make repr");
929 }
930 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000932 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 }
934 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 register char c;
937 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000938 int quote;
939
Thomas Wouters7e474022000-07-16 12:04:32 +0000940 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000941 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000942 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000943 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000944 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000945 quote = '"';
946
Tim Peters9161c8b2001-12-03 01:55:38 +0000947 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000948 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000950 /* There's at least enough room for a hex escape
951 and a closing quote. */
952 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000954 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000956 else if (c == '\t')
957 *p++ = '\\', *p++ = 't';
958 else if (c == '\n')
959 *p++ = '\\', *p++ = 'n';
960 else if (c == '\r')
961 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000962 else if (c < ' ' || c >= 0x7f) {
963 /* For performance, we don't want to call
964 PyOS_snprintf here (extra layers of
965 function call). */
966 sprintf(p, "\\x%02x", c & 0xff);
967 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000968 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000969 else
970 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000972 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000973 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000976 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000977 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979}
980
Guido van Rossum189f1df2001-05-01 16:51:53 +0000981static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000982string_repr(PyObject *op)
983{
984 return PyString_Repr(op, 1);
985}
986
987static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000988string_str(PyObject *s)
989{
Tim Petersc9933152001-10-16 20:18:24 +0000990 assert(PyString_Check(s));
991 if (PyString_CheckExact(s)) {
992 Py_INCREF(s);
993 return s;
994 }
995 else {
996 /* Subtype -- return genuine string with the same value. */
997 PyStringObject *t = (PyStringObject *) s;
998 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
999 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001000}
1001
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001003string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004{
1005 return a->ob_size;
1006}
1007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001009string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010{
Andrew Dalke598710c2006-05-25 18:18:39 +00001011 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001012 register PyStringObject *op;
1013 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001014#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001015 if (PyUnicode_Check(bb))
1016 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001017#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001018 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +00001019 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +00001020 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 return NULL;
1022 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001024 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +00001025 if ((a->ob_size == 0 || b->ob_size == 0) &&
1026 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1027 if (a->ob_size == 0) {
1028 Py_INCREF(bb);
1029 return bb;
1030 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 Py_INCREF(a);
1032 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033 }
1034 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +00001035 if (size < 0) {
1036 PyErr_SetString(PyExc_OverflowError,
1037 "strings are too large to concat");
1038 return NULL;
1039 }
1040
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001041 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001042 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001043 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001044 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001045 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001046 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001047 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1049 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001050 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001051 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001052#undef b
1053}
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001056string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001057{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001058 register Py_ssize_t i;
1059 register Py_ssize_t j;
1060 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001061 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001062 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063 if (n < 0)
1064 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001065 /* watch out for overflows: the size can overflow int,
1066 * and the # of bytes needed can overflow size_t
1067 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001068 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001069 if (n && size / n != a->ob_size) {
1070 PyErr_SetString(PyExc_OverflowError,
1071 "repeated string is too long");
1072 return NULL;
1073 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001074 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 Py_INCREF(a);
1076 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077 }
Tim Peterse7c05322004-06-27 17:24:49 +00001078 nbytes = (size_t)size;
1079 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001080 PyErr_SetString(PyExc_OverflowError,
1081 "repeated string is too long");
1082 return NULL;
1083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001084 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001085 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001086 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001087 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001088 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001089 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001090 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001091 op->ob_sval[size] = '\0';
1092 if (a->ob_size == 1 && n > 0) {
1093 memset(op->ob_sval, a->ob_sval[0] , n);
1094 return (PyObject *) op;
1095 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001096 i = 0;
1097 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001098 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1099 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001100 }
1101 while (i < size) {
1102 j = (i <= size-i) ? i : size-i;
1103 memcpy(op->ob_sval+i, op->ob_sval, j);
1104 i += j;
1105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001106 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001107}
1108
1109/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1110
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001112string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001113 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001114 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115{
1116 if (i < 0)
1117 i = 0;
1118 if (j < 0)
1119 j = 0; /* Avoid signed/unsigned bug in next line */
1120 if (j > a->ob_size)
1121 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001122 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1123 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001124 Py_INCREF(a);
1125 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001126 }
1127 if (j < i)
1128 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001129 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001130}
1131
Guido van Rossum9284a572000-03-07 15:53:43 +00001132static int
Fred Drakeba096332000-07-09 07:04:36 +00001133string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001134{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001135 char *s = PyString_AS_STRING(a);
1136 const char *sub = PyString_AS_STRING(el);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001137 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001138#ifdef USE_FAST
1139 Py_ssize_t pos;
1140#else
1141 char *last;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001142 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001143 char firstchar, lastchar;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001144#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001145
1146 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001147#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001148 if (PyUnicode_Check(el))
1149 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001150#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001151 if (!PyString_Check(el)) {
1152 PyErr_SetString(PyExc_TypeError,
1153 "'in <string>' requires string as left operand");
1154 return -1;
1155 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001156 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001157
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001158 if (len_sub == 0)
1159 return 1;
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001160
1161#ifdef USE_FAST
1162 pos = fastsearch(
1163 s, PyString_GET_SIZE(a),
1164 sub, len_sub, FAST_SEARCH
1165 );
1166 return (pos != -1);
1167#else
Tim Petersae1d0c92006-03-17 03:29:34 +00001168 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001169 substring. When s<last, there is still room for a possible match
1170 and s[0] through s[len_sub-1] will be in bounds.
1171 shortsub is len_sub minus the last character which is checked
1172 separately just before the memcmp(). That check helps prevent
1173 false starts and saves the setup time for memcmp().
1174 */
1175 firstchar = sub[0];
1176 shortsub = len_sub - 1;
1177 lastchar = sub[shortsub];
1178 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1179 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001180 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001181 if (s == NULL)
1182 return 0;
1183 assert(s < last);
1184 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001185 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001186 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001187 }
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001188#endif
Guido van Rossum9284a572000-03-07 15:53:43 +00001189 return 0;
1190}
1191
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001192static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001193string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001194{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001195 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001196 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001197 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001198 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001199 return NULL;
1200 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001201 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001202 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001203 if (v == NULL)
1204 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001205 else {
1206#ifdef COUNT_ALLOCS
1207 one_strings++;
1208#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001209 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001210 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001211 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001212}
1213
Martin v. Löwiscd353062001-05-24 16:56:35 +00001214static PyObject*
1215string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001216{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001217 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001218 Py_ssize_t len_a, len_b;
1219 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001220 PyObject *result;
1221
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001222 /* Make sure both arguments are strings. */
1223 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001224 result = Py_NotImplemented;
1225 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001226 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001227 if (a == b) {
1228 switch (op) {
1229 case Py_EQ:case Py_LE:case Py_GE:
1230 result = Py_True;
1231 goto out;
1232 case Py_NE:case Py_LT:case Py_GT:
1233 result = Py_False;
1234 goto out;
1235 }
1236 }
1237 if (op == Py_EQ) {
1238 /* Supporting Py_NE here as well does not save
1239 much time, since Py_NE is rarely used. */
1240 if (a->ob_size == b->ob_size
1241 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001242 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001243 a->ob_size) == 0)) {
1244 result = Py_True;
1245 } else {
1246 result = Py_False;
1247 }
1248 goto out;
1249 }
1250 len_a = a->ob_size; len_b = b->ob_size;
1251 min_len = (len_a < len_b) ? len_a : len_b;
1252 if (min_len > 0) {
1253 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1254 if (c==0)
1255 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1256 }else
1257 c = 0;
1258 if (c == 0)
1259 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1260 switch (op) {
1261 case Py_LT: c = c < 0; break;
1262 case Py_LE: c = c <= 0; break;
1263 case Py_EQ: assert(0); break; /* unreachable */
1264 case Py_NE: c = c != 0; break;
1265 case Py_GT: c = c > 0; break;
1266 case Py_GE: c = c >= 0; break;
1267 default:
1268 result = Py_NotImplemented;
1269 goto out;
1270 }
1271 result = c ? Py_True : Py_False;
1272 out:
1273 Py_INCREF(result);
1274 return result;
1275}
1276
1277int
1278_PyString_Eq(PyObject *o1, PyObject *o2)
1279{
1280 PyStringObject *a, *b;
1281 a = (PyStringObject*)o1;
1282 b = (PyStringObject*)o2;
1283 return a->ob_size == b->ob_size
1284 && *a->ob_sval == *b->ob_sval
1285 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001286}
1287
Guido van Rossum9bfef441993-03-29 10:43:31 +00001288static long
Fred Drakeba096332000-07-09 07:04:36 +00001289string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001290{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001291 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001292 register unsigned char *p;
1293 register long x;
1294
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001295 if (a->ob_shash != -1)
1296 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001297 len = a->ob_size;
1298 p = (unsigned char *) a->ob_sval;
1299 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001300 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001301 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001302 x ^= a->ob_size;
1303 if (x == -1)
1304 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001305 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001306 return x;
1307}
1308
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001309#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1310
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001311static PyObject*
1312string_subscript(PyStringObject* self, PyObject* item)
1313{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001314 PyNumberMethods *nb = item->ob_type->tp_as_number;
1315 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1316 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001317 if (i == -1 && PyErr_Occurred())
1318 return NULL;
1319 if (i < 0)
1320 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001321 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001322 }
1323 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001325 char* source_buf;
1326 char* result_buf;
1327 PyObject* result;
1328
Tim Petersae1d0c92006-03-17 03:29:34 +00001329 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001330 PyString_GET_SIZE(self),
1331 &start, &stop, &step, &slicelength) < 0) {
1332 return NULL;
1333 }
1334
1335 if (slicelength <= 0) {
1336 return PyString_FromStringAndSize("", 0);
1337 }
1338 else {
1339 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001340 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001341 if (result_buf == NULL)
1342 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001343
Tim Petersae1d0c92006-03-17 03:29:34 +00001344 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001345 cur += step, i++) {
1346 result_buf[i] = source_buf[cur];
1347 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001348
1349 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001350 slicelength);
1351 PyMem_Free(result_buf);
1352 return result;
1353 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001354 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001355 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001356 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001357 "string indices must be integers");
1358 return NULL;
1359 }
1360}
1361
Martin v. Löwis18e16552006-02-15 17:27:45 +00001362static Py_ssize_t
1363string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001364{
1365 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001366 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001367 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001368 return -1;
1369 }
1370 *ptr = (void *)self->ob_sval;
1371 return self->ob_size;
1372}
1373
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374static Py_ssize_t
1375string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001376{
Guido van Rossum045e6881997-09-08 18:30:11 +00001377 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001378 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001379 return -1;
1380}
1381
Martin v. Löwis18e16552006-02-15 17:27:45 +00001382static Py_ssize_t
1383string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001384{
1385 if ( lenp )
1386 *lenp = self->ob_size;
1387 return 1;
1388}
1389
Martin v. Löwis18e16552006-02-15 17:27:45 +00001390static Py_ssize_t
1391string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001392{
1393 if ( index != 0 ) {
1394 PyErr_SetString(PyExc_SystemError,
1395 "accessing non-existent string segment");
1396 return -1;
1397 }
1398 *ptr = self->ob_sval;
1399 return self->ob_size;
1400}
1401
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001402static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001403 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001404 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001405 (ssizeargfunc)string_repeat, /*sq_repeat*/
1406 (ssizeargfunc)string_item, /*sq_item*/
1407 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001408 0, /*sq_ass_item*/
1409 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001410 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001411};
1412
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001413static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001414 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001415 (binaryfunc)string_subscript,
1416 0,
1417};
1418
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001419static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001420 (readbufferproc)string_buffer_getreadbuf,
1421 (writebufferproc)string_buffer_getwritebuf,
1422 (segcountproc)string_buffer_getsegcount,
1423 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001424};
1425
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426
1427
1428#define LEFTSTRIP 0
1429#define RIGHTSTRIP 1
1430#define BOTHSTRIP 2
1431
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001432/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001433static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1434
1435#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001436
Andrew Dalke525eab32006-05-26 14:00:45 +00001437
1438/* Overallocate the initial list to reduce the number of reallocs for small
1439 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1440 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1441 text (roughly 11 words per line) and field delimited data (usually 1-10
1442 fields). For large strings the split algorithms are bandwidth limited
1443 so increasing the preallocation likely will not improve things.*/
1444
1445#define MAX_PREALLOC 12
1446
1447/* 5 splits gives 6 elements */
1448#define PREALLOC_SIZE(maxsplit) \
1449 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1450
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451#define SPLIT_APPEND(data, left, right) \
1452 str = PyString_FromStringAndSize((data) + (left), \
1453 (right) - (left)); \
1454 if (str == NULL) \
1455 goto onError; \
1456 if (PyList_Append(list, str)) { \
1457 Py_DECREF(str); \
1458 goto onError; \
1459 } \
1460 else \
1461 Py_DECREF(str);
1462
Andrew Dalke525eab32006-05-26 14:00:45 +00001463#define SPLIT_ADD(data, left, right) \
1464 str = PyString_FromStringAndSize((data) + (left), \
1465 (right) - (left)); \
1466 if (str == NULL) \
1467 goto onError; \
1468 if (count < MAX_PREALLOC) { \
1469 PyList_SET_ITEM(list, count, str); \
1470 } else { \
1471 if (PyList_Append(list, str)) { \
1472 Py_DECREF(str); \
1473 goto onError; \
1474 } \
1475 else \
1476 Py_DECREF(str); \
1477 } \
1478 count++;
1479
1480/* Always force the list to the expected size. */
1481#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
1482
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001484split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485{
Andrew Dalke525eab32006-05-26 14:00:45 +00001486 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001488 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489
1490 if (list == NULL)
1491 return NULL;
1492
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 for (i = j = 0; i < len; ) {
1494 while (i < len && isspace(Py_CHARMASK(s[i])))
1495 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497 while (i < len && !isspace(Py_CHARMASK(s[i])))
1498 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 if (maxsplit-- <= 0)
1501 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001502 SPLIT_ADD(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 while (i < len && isspace(Py_CHARMASK(s[i])))
1504 i++;
1505 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 }
1507 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001508 if (j < len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001509 SPLIT_ADD(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001511 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001513 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 Py_DECREF(list);
1515 return NULL;
1516}
1517
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001518static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001519split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001520{
Andrew Dalke525eab32006-05-26 14:00:45 +00001521 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001522 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001523 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001524
1525 if (list == NULL)
1526 return NULL;
1527
1528 for (i = j = 0; i < len; ) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001529 /* TODO: Use findchar/memchr for this? */
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001530 if (s[i] == ch) {
1531 if (maxcount-- <= 0)
1532 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001533 SPLIT_ADD(s, j, i);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001534 i = j = i + 1;
1535 } else
1536 i++;
1537 }
1538 if (j <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001539 SPLIT_ADD(s, j, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001541 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001542 return list;
1543
1544 onError:
1545 Py_DECREF(list);
1546 return NULL;
1547}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001549PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550"S.split([sep [,maxsplit]]) -> list of strings\n\
1551\n\
1552Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001553delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001554splits are done. If sep is not specified or is None, any\n\
1555whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556
1557static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001558string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001560 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001561 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001563 PyObject *list, *str, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564
Martin v. Löwis9c830762006-04-13 08:37:17 +00001565 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001568 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 if (PyString_Check(subobj)) {
1572 sub = PyString_AS_STRING(subobj);
1573 n = PyString_GET_SIZE(subobj);
1574 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001575#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576 else if (PyUnicode_Check(subobj))
1577 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001578#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001579 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 if (n == 0) {
1583 PyErr_SetString(PyExc_ValueError, "empty separator");
1584 return NULL;
1585 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001586 else if (n == 1)
1587 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588
Andrew Dalke525eab32006-05-26 14:00:45 +00001589 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590 if (list == NULL)
1591 return NULL;
1592
1593 i = j = 0;
1594 while (i+n <= len) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001595 /* TODO: Use Py_STRING_MATCH */
Fred Drake396f6e02000-06-20 15:47:54 +00001596 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597 if (maxsplit-- <= 0)
1598 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001599 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 }
1602 else
1603 i++;
1604 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001605 SPLIT_ADD(s, j, len);
1606 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 return list;
1608
Andrew Dalke525eab32006-05-26 14:00:45 +00001609 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610 Py_DECREF(list);
1611 return NULL;
1612}
1613
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001614PyDoc_STRVAR(partition__doc__,
1615"S.partition(sep) -> (head, sep, tail)\n\
1616\n\
1617Searches for the separator sep in S, and returns the part before it,\n\
1618the separator itself, and the part after it. If the separator is not\n\
1619found, returns S and two empty strings.");
1620
1621static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001622string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001623{
1624 Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
1625 const char *str = PyString_AS_STRING(self), *sep;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001626 PyObject * out;
1627
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001628 if (PyString_Check(sep_obj)) {
1629 sep = PyString_AS_STRING(sep_obj);
1630 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001631 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001632#ifdef Py_USING_UNICODE
1633 else if (PyUnicode_Check(sep_obj))
1634 return PyUnicode_Partition((PyObject *)self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001635#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001636 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001637 return NULL;
1638
1639 if (sep_len == 0) {
1640 PyErr_SetString(PyExc_ValueError, "empty separator");
1641 return NULL;
1642 }
1643
1644 out = PyTuple_New(3);
1645 if (!out)
1646 return NULL;
1647
1648 pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
1649 if (pos < 0) {
1650 Py_INCREF(self);
1651 PyTuple_SET_ITEM(out, 0, (PyObject*) self);
1652 Py_INCREF(nullstring);
1653 PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring);
1654 Py_INCREF(nullstring);
1655 PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
1656 } else {
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001657 PyObject* obj;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001658 PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001659 Py_INCREF(sep_obj);
1660 PyTuple_SET_ITEM(out, 1, sep_obj);
1661 pos += sep_len;
1662 obj = PyString_FromStringAndSize(str + pos, len - pos);
1663 PyTuple_SET_ITEM(out, 2, obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001664 if (PyErr_Occurred()) {
1665 Py_DECREF(out);
1666 return NULL;
1667 }
1668 }
1669
1670 return out;
1671}
1672
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001673static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001674rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001675{
Andrew Dalke525eab32006-05-26 14:00:45 +00001676 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001677 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001678 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679
1680 if (list == NULL)
1681 return NULL;
1682
1683 for (i = j = len - 1; i >= 0; ) {
1684 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1685 i--;
1686 j = i;
1687 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1688 i--;
1689 if (j > i) {
1690 if (maxsplit-- <= 0)
1691 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001692 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001693 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1694 i--;
1695 j = i;
1696 }
1697 }
1698 if (j >= 0) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001699 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001701 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001702 if (PyList_Reverse(list) < 0)
1703 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001704 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001705 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001706 Py_DECREF(list);
1707 return NULL;
1708}
1709
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001710static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001711rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001712{
Andrew Dalke525eab32006-05-26 14:00:45 +00001713 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001714 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001715 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001716
1717 if (list == NULL)
1718 return NULL;
1719
1720 for (i = j = len - 1; i >= 0; ) {
1721 if (s[i] == ch) {
1722 if (maxcount-- <= 0)
1723 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001724 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001725 j = i = i - 1;
1726 } else
1727 i--;
1728 }
1729 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001730 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001731 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001732 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001733 if (PyList_Reverse(list) < 0)
1734 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001735 return list;
1736
1737 onError:
1738 Py_DECREF(list);
1739 return NULL;
1740}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001741
1742PyDoc_STRVAR(rsplit__doc__,
1743"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1744\n\
1745Return a list of the words in the string S, using sep as the\n\
1746delimiter string, starting at the end of the string and working\n\
1747to the front. If maxsplit is given, at most maxsplit splits are\n\
1748done. If sep is not specified or is None, any whitespace string\n\
1749is a separator.");
1750
1751static PyObject *
1752string_rsplit(PyStringObject *self, PyObject *args)
1753{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001754 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001755 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001756 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001757 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001758
Martin v. Löwis9c830762006-04-13 08:37:17 +00001759 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001760 return NULL;
1761 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001762 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001763 if (subobj == Py_None)
1764 return rsplit_whitespace(s, len, maxsplit);
1765 if (PyString_Check(subobj)) {
1766 sub = PyString_AS_STRING(subobj);
1767 n = PyString_GET_SIZE(subobj);
1768 }
1769#ifdef Py_USING_UNICODE
1770 else if (PyUnicode_Check(subobj))
1771 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1772#endif
1773 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1774 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001775
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001776 if (n == 0) {
1777 PyErr_SetString(PyExc_ValueError, "empty separator");
1778 return NULL;
1779 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001780 else if (n == 1)
1781 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001782
Andrew Dalke525eab32006-05-26 14:00:45 +00001783 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001784 if (list == NULL)
1785 return NULL;
1786
1787 j = len;
1788 i = j - n;
1789 while (i >= 0) {
1790 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1791 if (maxsplit-- <= 0)
1792 break;
Andrew Dalke525eab32006-05-26 14:00:45 +00001793 SPLIT_ADD(s, i+n, j);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001794 j = i;
1795 i -= n;
1796 }
1797 else
1798 i--;
1799 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001800 SPLIT_ADD(s, 0, j);
1801 FIX_PREALLOC_SIZE(list);
1802 if (PyList_Reverse(list) < 0)
1803 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001804 return list;
1805
Andrew Dalke525eab32006-05-26 14:00:45 +00001806onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001807 Py_DECREF(list);
1808 return NULL;
1809}
1810
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001812PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813"S.join(sequence) -> string\n\
1814\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001815Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001816sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817
1818static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001819string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820{
1821 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001822 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001825 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001826 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001827 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001828 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829
Tim Peters19fe14e2001-01-19 03:03:47 +00001830 seq = PySequence_Fast(orig, "");
1831 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001832 return NULL;
1833 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001834
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001835 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001836 if (seqlen == 0) {
1837 Py_DECREF(seq);
1838 return PyString_FromString("");
1839 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001841 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001842 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1843 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001844 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001845 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001846 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001848
Raymond Hettinger674f2412004-08-23 23:23:54 +00001849 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001850 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001851 * Do a pre-pass to figure out the total amount of space we'll
1852 * need (sz), see whether any argument is absurd, and defer to
1853 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001854 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001855 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001856 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001857 item = PySequence_Fast_GET_ITEM(seq, i);
1858 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001859#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001860 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001861 /* Defer to Unicode join.
1862 * CAUTION: There's no gurantee that the
1863 * original sequence can be iterated over
1864 * again, so we must pass seq here.
1865 */
1866 PyObject *result;
1867 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001868 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001869 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001870 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001871#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001872 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001873 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001874 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001875 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001876 Py_DECREF(seq);
1877 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001878 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001879 sz += PyString_GET_SIZE(item);
1880 if (i != 0)
1881 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001882 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001883 PyErr_SetString(PyExc_OverflowError,
1884 "join() is too long for a Python string");
1885 Py_DECREF(seq);
1886 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001888 }
1889
1890 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001891 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001892 if (res == NULL) {
1893 Py_DECREF(seq);
1894 return NULL;
1895 }
1896
1897 /* Catenate everything. */
1898 p = PyString_AS_STRING(res);
1899 for (i = 0; i < seqlen; ++i) {
1900 size_t n;
1901 item = PySequence_Fast_GET_ITEM(seq, i);
1902 n = PyString_GET_SIZE(item);
1903 memcpy(p, PyString_AS_STRING(item), n);
1904 p += n;
1905 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001906 memcpy(p, sep, seplen);
1907 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001908 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001910
Jeremy Hylton49048292000-07-11 03:28:17 +00001911 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913}
1914
Tim Peters52e155e2001-06-16 05:42:57 +00001915PyObject *
1916_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001917{
Tim Petersa7259592001-06-16 05:11:17 +00001918 assert(sep != NULL && PyString_Check(sep));
1919 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001920 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001921}
1922
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001923static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001924string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001925{
1926 if (*end > len)
1927 *end = len;
1928 else if (*end < 0)
1929 *end += len;
1930 if (*end < 0)
1931 *end = 0;
1932 if (*start < 0)
1933 *start += len;
1934 if (*start < 0)
1935 *start = 0;
1936}
1937
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001939string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001942 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001943 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001944 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001947 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001948 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 return -2;
1950 if (PyString_Check(subobj)) {
1951 sub = PyString_AS_STRING(subobj);
1952 n = PyString_GET_SIZE(subobj);
1953 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001954#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001955 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001956 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001957#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001958 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 return -2;
1960
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001961 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
Fredrik Lundhc3434b32006-05-25 18:44:29 +00001963#ifdef USE_FAST
1964 if (n == 0)
1965 return (dir > 0) ? i : last;
1966 if (dir > 0) {
1967 Py_ssize_t pos = fastsearch(s + i, last - i, sub, n,
1968 FAST_SEARCH);
1969 if (pos < 0)
1970 return pos;
1971 return pos + i;
1972 }
1973#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001974 if (dir > 0) {
1975 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 last -= n;
1978 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001979 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001980 return (long)i;
1981 }
1982 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001983 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001984
Guido van Rossum4c08d552000-03-10 22:55:18 +00001985 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001986 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001987 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001988 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001990 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001991
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 return -1;
1993}
1994
1995
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001996PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997"S.find(sub [,start [,end]]) -> int\n\
1998\n\
1999Return the lowest index in S where substring sub is found,\n\
2000such that sub is contained within s[start,end]. Optional\n\
2001arguments start and end are interpreted as in slice notation.\n\
2002\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
2005static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002006string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002008 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 if (result == -2)
2010 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012}
2013
2014
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002015PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016"S.index(sub [,start [,end]]) -> int\n\
2017\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002018Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019
2020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002021string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002023 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024 if (result == -2)
2025 return NULL;
2026 if (result == -1) {
2027 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002028 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 return NULL;
2030 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002031 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032}
2033
2034
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002035PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036"S.rfind(sub [,start [,end]]) -> int\n\
2037\n\
2038Return the highest index in S where substring sub is found,\n\
2039such that sub is contained within s[start,end]. Optional\n\
2040arguments start and end are interpreted as in slice notation.\n\
2041\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002042Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043
2044static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002045string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002047 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 if (result == -2)
2049 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002050 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051}
2052
2053
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002054PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002055"S.rindex(sub [,start [,end]]) -> int\n\
2056\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058
2059static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002060string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002062 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063 if (result == -2)
2064 return NULL;
2065 if (result == -1) {
2066 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002067 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068 return NULL;
2069 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002070 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071}
2072
2073
2074static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002075do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2076{
2077 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002078 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002080 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2081 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002082
2083 i = 0;
2084 if (striptype != RIGHTSTRIP) {
2085 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2086 i++;
2087 }
2088 }
2089
2090 j = len;
2091 if (striptype != LEFTSTRIP) {
2092 do {
2093 j--;
2094 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2095 j++;
2096 }
2097
2098 if (i == 0 && j == len && PyString_CheckExact(self)) {
2099 Py_INCREF(self);
2100 return (PyObject*)self;
2101 }
2102 else
2103 return PyString_FromStringAndSize(s+i, j-i);
2104}
2105
2106
2107static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002108do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109{
2110 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002111 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113 i = 0;
2114 if (striptype != RIGHTSTRIP) {
2115 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2116 i++;
2117 }
2118 }
2119
2120 j = len;
2121 if (striptype != LEFTSTRIP) {
2122 do {
2123 j--;
2124 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2125 j++;
2126 }
2127
Tim Peters8fa5dd02001-09-12 02:18:30 +00002128 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 Py_INCREF(self);
2130 return (PyObject*)self;
2131 }
2132 else
2133 return PyString_FromStringAndSize(s+i, j-i);
2134}
2135
2136
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002137static PyObject *
2138do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2139{
2140 PyObject *sep = NULL;
2141
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002142 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002143 return NULL;
2144
2145 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002146 if (PyString_Check(sep))
2147 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002148#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002149 else if (PyUnicode_Check(sep)) {
2150 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2151 PyObject *res;
2152 if (uniself==NULL)
2153 return NULL;
2154 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2155 striptype, sep);
2156 Py_DECREF(uniself);
2157 return res;
2158 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002159#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002160 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002161#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002162 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002163#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002164 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002165#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002166 STRIPNAME(striptype));
2167 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002168 }
2169
2170 return do_strip(self, striptype);
2171}
2172
2173
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002174PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002175"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176\n\
2177Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002178whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002179If chars is given and not None, remove characters in chars instead.\n\
2180If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181
2182static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002183string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002185 if (PyTuple_GET_SIZE(args) == 0)
2186 return do_strip(self, BOTHSTRIP); /* Common case */
2187 else
2188 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189}
2190
2191
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002192PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002193"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002195Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002196If chars is given and not None, remove characters in chars instead.\n\
2197If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198
2199static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002200string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002202 if (PyTuple_GET_SIZE(args) == 0)
2203 return do_strip(self, LEFTSTRIP); /* Common case */
2204 else
2205 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206}
2207
2208
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002209PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002210"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002212Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002213If chars is given and not None, remove characters in chars instead.\n\
2214If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215
2216static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002217string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002219 if (PyTuple_GET_SIZE(args) == 0)
2220 return do_strip(self, RIGHTSTRIP); /* Common case */
2221 else
2222 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223}
2224
2225
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002226PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227"S.lower() -> string\n\
2228\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002229Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002231/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2232#ifndef _tolower
2233#define _tolower tolower
2234#endif
2235
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002237string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002239 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002240 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002241 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002243 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002244 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002246
2247 s = PyString_AS_STRING(newobj);
2248
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002249 memcpy(s, PyString_AS_STRING(self), n);
2250
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002252 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002253 if (isupper(c))
2254 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002256
Anthony Baxtera6286212006-04-11 07:42:36 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261"S.upper() -> string\n\
2262\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002263Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002265#ifndef _toupper
2266#define _toupper toupper
2267#endif
2268
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002270string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002272 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002273 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002274 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002276 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002277 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002279
2280 s = PyString_AS_STRING(newobj);
2281
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002282 memcpy(s, PyString_AS_STRING(self), n);
2283
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002285 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002286 if (islower(c))
2287 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002289
Anthony Baxtera6286212006-04-11 07:42:36 +00002290 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291}
2292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294"S.title() -> string\n\
2295\n\
2296Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002297characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298
2299static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002300string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301{
2302 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002303 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002305 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306
Anthony Baxtera6286212006-04-11 07:42:36 +00002307 newobj = PyString_FromStringAndSize(NULL, n);
2308 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002310 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 for (i = 0; i < n; i++) {
2312 int c = Py_CHARMASK(*s++);
2313 if (islower(c)) {
2314 if (!previous_is_cased)
2315 c = toupper(c);
2316 previous_is_cased = 1;
2317 } else if (isupper(c)) {
2318 if (previous_is_cased)
2319 c = tolower(c);
2320 previous_is_cased = 1;
2321 } else
2322 previous_is_cased = 0;
2323 *s_new++ = c;
2324 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002325 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326}
2327
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329"S.capitalize() -> string\n\
2330\n\
2331Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002332capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333
2334static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002335string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336{
2337 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002338 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002339 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340
Anthony Baxtera6286212006-04-11 07:42:36 +00002341 newobj = PyString_FromStringAndSize(NULL, n);
2342 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002344 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345 if (0 < n) {
2346 int c = Py_CHARMASK(*s++);
2347 if (islower(c))
2348 *s_new = toupper(c);
2349 else
2350 *s_new = c;
2351 s_new++;
2352 }
2353 for (i = 1; i < n; i++) {
2354 int c = Py_CHARMASK(*s++);
2355 if (isupper(c))
2356 *s_new = tolower(c);
2357 else
2358 *s_new = c;
2359 s_new++;
2360 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002361 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362}
2363
2364
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002365PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366"S.count(sub[, start[, end]]) -> int\n\
2367\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002368Return the number of non-overlapping occurrences of substring sub in\n\
2369string S[start:end]. Optional arguments start and end are interpreted\n\
2370as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371
2372static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002373string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374{
Fredrik Lundhaf722372006-05-25 17:55:31 +00002375 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002376 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002377 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002378 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002379 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
Guido van Rossumc6821402000-05-08 14:08:05 +00002381 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2382 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002384
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 if (PyString_Check(subobj)) {
2386 sub = PyString_AS_STRING(subobj);
2387 n = PyString_GET_SIZE(subobj);
2388 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002389#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002390 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002391 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002392 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2393 if (count == -1)
2394 return NULL;
2395 else
2396 return PyInt_FromLong((long) count);
2397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002398#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002399 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2400 return NULL;
2401
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002402 string_adjust_indices(&i, &last, len);
2403
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 m = last + 1 - n;
2405 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002406 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407
Fredrik Lundhaf722372006-05-25 17:55:31 +00002408#ifdef USE_FAST
2409 r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
2410 if (r < 0)
2411 r = 0; /* no match */
2412#else
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 r = 0;
2414 while (i < m) {
Fredrik Lundhaf722372006-05-25 17:55:31 +00002415 const char *t
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416 if (!memcmp(s+i, sub, n)) {
2417 r++;
2418 i += n;
2419 } else {
2420 i++;
2421 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002422 if (i >= m)
2423 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002424 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002425 if (t == NULL)
2426 break;
2427 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 }
Fredrik Lundhaf722372006-05-25 17:55:31 +00002429#endif
Martin v. Löwis18e16552006-02-15 17:27:45 +00002430 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431}
2432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434"S.swapcase() -> string\n\
2435\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002437converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002440string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441{
2442 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002443 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002444 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Anthony Baxtera6286212006-04-11 07:42:36 +00002446 newobj = PyString_FromStringAndSize(NULL, n);
2447 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002449 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450 for (i = 0; i < n; i++) {
2451 int c = Py_CHARMASK(*s++);
2452 if (islower(c)) {
2453 *s_new = toupper(c);
2454 }
2455 else if (isupper(c)) {
2456 *s_new = tolower(c);
2457 }
2458 else
2459 *s_new = c;
2460 s_new++;
2461 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002462 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463}
2464
2465
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002466PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467"S.translate(table [,deletechars]) -> string\n\
2468\n\
2469Return a copy of the string S, where all characters occurring\n\
2470in the optional argument deletechars are removed, and the\n\
2471remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002472translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002473
2474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002475string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477 register char *input, *output;
2478 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002479 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002481 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002482 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 PyObject *result;
2484 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002485 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002487 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002488 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002490
2491 if (PyString_Check(tableobj)) {
2492 table1 = PyString_AS_STRING(tableobj);
2493 tablen = PyString_GET_SIZE(tableobj);
2494 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002495#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002497 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002498 parameter; instead a mapping to None will cause characters
2499 to be deleted. */
2500 if (delobj != NULL) {
2501 PyErr_SetString(PyExc_TypeError,
2502 "deletions are implemented differently for unicode");
2503 return NULL;
2504 }
2505 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2506 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002507#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002508 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002509 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510
Martin v. Löwis00b61272002-12-12 20:03:19 +00002511 if (tablen != 256) {
2512 PyErr_SetString(PyExc_ValueError,
2513 "translation table must be 256 characters long");
2514 return NULL;
2515 }
2516
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 if (delobj != NULL) {
2518 if (PyString_Check(delobj)) {
2519 del_table = PyString_AS_STRING(delobj);
2520 dellen = PyString_GET_SIZE(delobj);
2521 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002522#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002523 else if (PyUnicode_Check(delobj)) {
2524 PyErr_SetString(PyExc_TypeError,
2525 "deletions are implemented differently for unicode");
2526 return NULL;
2527 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002528#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2530 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 }
2532 else {
2533 del_table = NULL;
2534 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 }
2536
2537 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002538 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539 result = PyString_FromStringAndSize((char *)NULL, inlen);
2540 if (result == NULL)
2541 return NULL;
2542 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002543 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544
2545 if (dellen == 0) {
2546 /* If no deletions are required, use faster code */
2547 for (i = inlen; --i >= 0; ) {
2548 c = Py_CHARMASK(*input++);
2549 if (Py_CHARMASK((*output++ = table[c])) != c)
2550 changed = 1;
2551 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002552 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553 return result;
2554 Py_DECREF(result);
2555 Py_INCREF(input_obj);
2556 return input_obj;
2557 }
2558
2559 for (i = 0; i < 256; i++)
2560 trans_table[i] = Py_CHARMASK(table[i]);
2561
2562 for (i = 0; i < dellen; i++)
2563 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2564
2565 for (i = inlen; --i >= 0; ) {
2566 c = Py_CHARMASK(*input++);
2567 if (trans_table[c] != -1)
2568 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2569 continue;
2570 changed = 1;
2571 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002572 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002573 Py_DECREF(result);
2574 Py_INCREF(input_obj);
2575 return input_obj;
2576 }
2577 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002578 if (inlen > 0)
2579 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580 return result;
2581}
2582
2583
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584#define FORWARD 1
2585#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002586
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002588
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589/* Don't call if length < 2 */
2590#define Py_STRING_MATCH(target, offset, pattern, length) \
2591 (target[offset] == pattern[0] && \
2592 target[offset+length-1] == pattern[length-1] && \
2593 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002595#define findchar(target, target_len, c) \
2596 ((char *)memchr((const void *)(target), c, target_len))
2597
2598/* String ops must return a string. */
2599/* If the object is subclass of string, create a copy */
2600static PyStringObject *
2601return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002602{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002603 if (PyString_CheckExact(self)) {
2604 Py_INCREF(self);
2605 return self;
2606 }
2607 return (PyStringObject *)PyString_FromStringAndSize(
2608 PyString_AS_STRING(self),
2609 PyString_GET_SIZE(self));
2610}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002612static Py_ssize_t
2613countchar(char *target, int target_len, char c)
2614{
2615 Py_ssize_t count=0;
2616 char *start=target;
2617 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002619 while ( (start=findchar(start, end-start, c)) != NULL ) {
2620 count++;
2621 start += 1;
2622 }
2623
2624 return count;
2625}
2626
2627static Py_ssize_t
2628findstring(char *target, Py_ssize_t target_len,
2629 char *pattern, Py_ssize_t pattern_len,
2630 Py_ssize_t start,
2631 Py_ssize_t end,
2632 int direction)
2633{
2634 if (start < 0) {
2635 start += target_len;
2636 if (start < 0)
2637 start = 0;
2638 }
2639 if (end > target_len) {
2640 end = target_len;
2641 } else if (end < 0) {
2642 end += target_len;
2643 if (end < 0)
2644 end = 0;
2645 }
2646
2647 /* zero-length substrings always match at the first attempt */
2648 if (pattern_len == 0)
2649 return (direction > 0) ? start : end;
2650
2651 end -= pattern_len;
2652
2653 if (direction < 0) {
2654 for (; end >= start; end--)
2655 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2656 return end;
2657 } else {
2658 for (; start <= end; start++)
2659 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2660 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002661 }
2662 return -1;
2663}
2664
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665Py_ssize_t
2666countstring(char *target, Py_ssize_t target_len,
2667 char *pattern, Py_ssize_t pattern_len,
2668 Py_ssize_t start,
2669 Py_ssize_t end,
2670 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002672 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002674 if (start < 0) {
2675 start += target_len;
2676 if (start < 0)
2677 start = 0;
2678 }
2679 if (end > target_len) {
2680 end = target_len;
2681 } else if (end < 0) {
2682 end += target_len;
2683 if (end < 0)
2684 end = 0;
2685 }
2686
2687 /* zero-length substrings match everywhere */
2688 if (pattern_len == 0)
2689 return target_len+1;
2690
2691 end -= pattern_len;
2692
2693 if (direction < 0) {
2694 for (; end >= start; end--)
2695 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2696 count++;
2697 end -= pattern_len-1;
2698 }
2699 } else {
2700 for (; start <= end; start++)
2701 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2702 count++;
2703 start += pattern_len-1;
2704 }
2705 }
2706 return count;
2707}
2708
2709
2710/* Algorithms for difference cases of string replacement */
2711
2712/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2713static PyStringObject *
2714replace_interleave(PyStringObject *self,
2715 PyStringObject *to,
2716 Py_ssize_t maxcount)
2717{
2718 char *self_s, *to_s, *result_s;
2719 Py_ssize_t self_len, to_len, result_len;
2720 Py_ssize_t count, i, product;
2721 PyStringObject *result;
2722
2723 self_len = PyString_GET_SIZE(self);
2724 to_len = PyString_GET_SIZE(to);
2725
2726 /* 1 at the end plus 1 after every character */
2727 count = self_len+1;
2728 if (maxcount < count)
2729 count = maxcount;
2730
2731 /* Check for overflow */
2732 /* result_len = count * to_len + self_len; */
2733 product = count * to_len;
2734 if (product / to_len != count) {
2735 PyErr_SetString(PyExc_OverflowError,
2736 "replace string is too long");
2737 return NULL;
2738 }
2739 result_len = product + self_len;
2740 if (result_len < 0) {
2741 PyErr_SetString(PyExc_OverflowError,
2742 "replace string is too long");
2743 return NULL;
2744 }
2745
2746 if (! (result = (PyStringObject *)
2747 PyString_FromStringAndSize(NULL, result_len)) )
2748 return NULL;
2749
2750 self_s = PyString_AS_STRING(self);
2751 to_s = PyString_AS_STRING(to);
2752 to_len = PyString_GET_SIZE(to);
2753 result_s = PyString_AS_STRING(result);
2754
2755 /* TODO: special case single character, which doesn't need memcpy */
2756
2757 /* Lay the first one down (guaranteed this will occur) */
2758 memcpy(result_s, to_s, to_len);
2759 result_s += to_len;
2760 count -= 1;
2761
2762 for (i=0; i<count; i++) {
2763 *result_s++ = *self_s++;
2764 memcpy(result_s, to_s, to_len);
2765 result_s += to_len;
2766 }
2767
2768 /* Copy the rest of the original string */
2769 memcpy(result_s, self_s, self_len-i);
2770
2771 return result;
2772}
2773
2774/* Special case for deleting a single character */
2775/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2776static PyStringObject *
2777replace_delete_single_character(PyStringObject *self,
2778 char from_c, Py_ssize_t maxcount)
2779{
2780 char *self_s, *result_s;
2781 char *start, *next, *end;
2782 Py_ssize_t self_len, result_len;
2783 Py_ssize_t count;
2784 PyStringObject *result;
2785
2786 self_len = PyString_GET_SIZE(self);
2787 self_s = PyString_AS_STRING(self);
2788
2789 count = countchar(self_s, self_len, from_c);
2790 if (count == 0) {
2791 return return_self(self);
2792 }
2793 if (count > maxcount)
2794 count = maxcount;
2795
2796 result_len = self_len - count; /* from_len == 1 */
2797 assert(result_len>=0);
2798
2799 if ( (result = (PyStringObject *)
2800 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2801 return NULL;
2802 result_s = PyString_AS_STRING(result);
2803
2804 start = self_s;
2805 end = self_s + self_len;
2806 while (count-- > 0) {
2807 next = findchar(start, end-start, from_c);
2808 if (next == NULL)
2809 break;
2810 memcpy(result_s, start, next-start);
2811 result_s += (next-start);
2812 start = next+1;
2813 }
2814 memcpy(result_s, start, end-start);
2815
2816 return result;
2817}
2818
2819/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2820
2821static PyStringObject *
2822replace_delete_substring(PyStringObject *self, PyStringObject *from,
2823 Py_ssize_t maxcount) {
2824 char *self_s, *from_s, *result_s;
2825 char *start, *next, *end;
2826 Py_ssize_t self_len, from_len, result_len;
2827 Py_ssize_t count, offset;
2828 PyStringObject *result;
2829
2830 self_len = PyString_GET_SIZE(self);
2831 self_s = PyString_AS_STRING(self);
2832 from_len = PyString_GET_SIZE(from);
2833 from_s = PyString_AS_STRING(from);
2834
2835 count = countstring(self_s, self_len,
2836 from_s, from_len,
2837 0, self_len, 1);
2838
2839 if (count > maxcount)
2840 count = maxcount;
2841
2842 if (count == 0) {
2843 /* no matches */
2844 return return_self(self);
2845 }
2846
2847 result_len = self_len - (count * from_len);
2848 assert (result_len>=0);
2849
2850 if ( (result = (PyStringObject *)
2851 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2852 return NULL;
2853
2854 result_s = PyString_AS_STRING(result);
2855
2856 start = self_s;
2857 end = self_s + self_len;
2858 while (count-- > 0) {
2859 offset = findstring(start, end-start,
2860 from_s, from_len,
2861 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002862 if (offset == -1)
2863 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002864 next = start + offset;
2865
2866 memcpy(result_s, start, next-start);
2867
2868 result_s += (next-start);
2869 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002870 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002871 memcpy(result_s, start, end-start);
2872 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002873}
2874
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002875/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2876static PyStringObject *
2877replace_single_character_in_place(PyStringObject *self,
2878 char from_c, char to_c,
2879 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002880{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002881 char *self_s, *result_s, *start, *end, *next;
2882 Py_ssize_t self_len;
2883 PyStringObject *result;
2884
2885 /* The result string will be the same size */
2886 self_s = PyString_AS_STRING(self);
2887 self_len = PyString_GET_SIZE(self);
2888
2889 next = findchar(self_s, self_len, from_c);
2890
2891 if (next == NULL) {
2892 /* No matches; return the original string */
2893 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002894 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002895
2896 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002897 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 if (result == NULL)
2899 return NULL;
2900 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002901 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002902
2903 /* change everything in-place, starting with this one */
2904 start = result_s + (next-self_s);
2905 *start = to_c;
2906 start++;
2907 end = result_s + self_len;
2908
2909 while (--maxcount > 0) {
2910 next = findchar(start, end-start, from_c);
2911 if (next == NULL)
2912 break;
2913 *next = to_c;
2914 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002915 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002916
2917 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002918}
2919
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002920/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2921static PyStringObject *
2922replace_substring_in_place(PyStringObject *self,
2923 PyStringObject *from,
2924 PyStringObject *to,
2925 Py_ssize_t maxcount)
2926{
2927 char *result_s, *start, *end;
2928 char *self_s, *from_s, *to_s;
2929 Py_ssize_t self_len, from_len, offset;
2930 PyStringObject *result;
2931
2932 /* The result string will be the same size */
2933
2934 self_s = PyString_AS_STRING(self);
2935 self_len = PyString_GET_SIZE(self);
2936
2937 from_s = PyString_AS_STRING(from);
2938 from_len = PyString_GET_SIZE(from);
2939 to_s = PyString_AS_STRING(to);
2940
2941 offset = findstring(self_s, self_len,
2942 from_s, from_len,
2943 0, self_len, FORWARD);
2944
2945 if (offset == -1) {
2946 /* No matches; return the original string */
2947 return return_self(self);
2948 }
2949
2950 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002951 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002952 if (result == NULL)
2953 return NULL;
2954 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002955 memcpy(result_s, self_s, self_len);
2956
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002957
2958 /* change everything in-place, starting with this one */
2959 start = result_s + offset;
2960 memcpy(start, to_s, from_len);
2961 start += from_len;
2962 end = result_s + self_len;
2963
2964 while ( --maxcount > 0) {
2965 offset = findstring(start, end-start,
2966 from_s, from_len,
2967 0, end-start, FORWARD);
2968 if (offset==-1)
2969 break;
2970 memcpy(start+offset, to_s, from_len);
2971 start += offset+from_len;
2972 }
2973
2974 return result;
2975}
2976
2977/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2978static PyStringObject *
2979replace_single_character(PyStringObject *self,
2980 char from_c,
2981 PyStringObject *to,
2982 Py_ssize_t maxcount)
2983{
2984 char *self_s, *to_s, *result_s;
2985 char *start, *next, *end;
2986 Py_ssize_t self_len, to_len, result_len;
2987 Py_ssize_t count, product;
2988 PyStringObject *result;
2989
2990 self_s = PyString_AS_STRING(self);
2991 self_len = PyString_GET_SIZE(self);
2992
2993 count = countchar(self_s, self_len, from_c);
2994 if (count > maxcount)
2995 count = maxcount;
2996
2997 if (count == 0) {
2998 /* no matches, return unchanged */
2999 return return_self(self);
3000 }
3001
3002 to_s = PyString_AS_STRING(to);
3003 to_len = PyString_GET_SIZE(to);
3004
3005 /* use the difference between current and new, hence the "-1" */
3006 /* result_len = self_len + count * (to_len-1) */
3007 product = count * (to_len-1);
3008 if (product / (to_len-1) != count) {
3009 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010 return NULL;
3011 }
3012 result_len = self_len + product;
3013 if (result_len < 0) {
3014 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015 return NULL;
3016 }
3017
3018 if ( (result = (PyStringObject *)
3019 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3020 return NULL;
3021 result_s = PyString_AS_STRING(result);
3022
3023 start = self_s;
3024 end = self_s + self_len;
3025 while (count-- > 0) {
3026 next = findchar(start, end-start, from_c);
3027 if (next == NULL)
3028 break;
3029
3030 if (next == start) {
3031 /* replace with the 'to' */
3032 memcpy(result_s, to_s, to_len);
3033 result_s += to_len;
3034 start += 1;
3035 } else {
3036 /* copy the unchanged old then the 'to' */
3037 memcpy(result_s, start, next-start);
3038 result_s += (next-start);
3039 memcpy(result_s, to_s, to_len);
3040 result_s += to_len;
3041 start = next+1;
3042 }
3043 }
3044 /* Copy the remainder of the remaining string */
3045 memcpy(result_s, start, end-start);
3046
3047 return result;
3048}
3049
3050/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3051static PyStringObject *
3052replace_substring(PyStringObject *self,
3053 PyStringObject *from,
3054 PyStringObject *to,
3055 Py_ssize_t maxcount) {
3056 char *self_s, *from_s, *to_s, *result_s;
3057 char *start, *next, *end;
3058 Py_ssize_t self_len, from_len, to_len, result_len;
3059 Py_ssize_t count, offset, product;
3060 PyStringObject *result;
3061
3062 self_s = PyString_AS_STRING(self);
3063 self_len = PyString_GET_SIZE(self);
3064 from_s = PyString_AS_STRING(from);
3065 from_len = PyString_GET_SIZE(from);
3066
3067 count = countstring(self_s, self_len,
3068 from_s, from_len,
3069 0, self_len, FORWARD);
3070 if (count > maxcount)
3071 count = maxcount;
3072
3073 if (count == 0) {
3074 /* no matches, return unchanged */
3075 return return_self(self);
3076 }
3077
3078 to_s = PyString_AS_STRING(to);
3079 to_len = PyString_GET_SIZE(to);
3080
3081 /* Check for overflow */
3082 /* result_len = self_len + count * (to_len-from_len) */
3083 product = count * (to_len-from_len);
3084 if (product / (to_len-from_len) != count) {
3085 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3086 return NULL;
3087 }
3088 result_len = self_len + product;
3089 if (result_len < 0) {
3090 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3091 return NULL;
3092 }
3093
3094 if ( (result = (PyStringObject *)
3095 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3096 return NULL;
3097 result_s = PyString_AS_STRING(result);
3098
3099 start = self_s;
3100 end = self_s + self_len;
3101 while (count-- > 0) {
3102 offset = findstring(start, end-start,
3103 from_s, from_len,
3104 0, end-start, FORWARD);
3105 if (offset == -1)
3106 break;
3107 next = start+offset;
3108 if (next == start) {
3109 /* replace with the 'to' */
3110 memcpy(result_s, to_s, to_len);
3111 result_s += to_len;
3112 start += from_len;
3113 } else {
3114 /* copy the unchanged old then the 'to' */
3115 memcpy(result_s, start, next-start);
3116 result_s += (next-start);
3117 memcpy(result_s, to_s, to_len);
3118 result_s += to_len;
3119 start = next+from_len;
3120 }
3121 }
3122 /* Copy the remainder of the remaining string */
3123 memcpy(result_s, start, end-start);
3124
3125 return result;
3126}
3127
3128
3129static PyStringObject *
3130replace(PyStringObject *self,
3131 PyStringObject *from,
3132 PyStringObject *to,
3133 Py_ssize_t maxcount)
3134{
3135 Py_ssize_t from_len, to_len;
3136
3137 if (maxcount < 0) {
3138 maxcount = PY_SSIZE_T_MAX;
3139 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3140 /* nothing to do; return the original string */
3141 return return_self(self);
3142 }
3143
3144 from_len = PyString_GET_SIZE(from);
3145 to_len = PyString_GET_SIZE(to);
3146
3147 if (maxcount == 0 ||
3148 (from_len == 0 && to_len == 0)) {
3149 /* nothing to do; return the original string */
3150 return return_self(self);
3151 }
3152
3153 /* Handle zero-length special cases */
3154
3155 if (from_len == 0) {
3156 /* insert the 'to' string everywhere. */
3157 /* >>> "Python".replace("", ".") */
3158 /* '.P.y.t.h.o.n.' */
3159 return replace_interleave(self, to, maxcount);
3160 }
3161
3162 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3163 /* point for an empty self string to generate a non-empty string */
3164 /* Special case so the remaining code always gets a non-empty string */
3165 if (PyString_GET_SIZE(self) == 0) {
3166 return return_self(self);
3167 }
3168
3169 if (to_len == 0) {
3170 /* delete all occurances of 'from' string */
3171 if (from_len == 1) {
3172 return replace_delete_single_character(
3173 self, PyString_AS_STRING(from)[0], maxcount);
3174 } else {
3175 return replace_delete_substring(self, from, maxcount);
3176 }
3177 }
3178
3179 /* Handle special case where both strings have the same length */
3180
3181 if (from_len == to_len) {
3182 if (from_len == 1) {
3183 return replace_single_character_in_place(
3184 self,
3185 PyString_AS_STRING(from)[0],
3186 PyString_AS_STRING(to)[0],
3187 maxcount);
3188 } else {
3189 return replace_substring_in_place(
3190 self, from, to, maxcount);
3191 }
3192 }
3193
3194 /* Otherwise use the more generic algorithms */
3195 if (from_len == 1) {
3196 return replace_single_character(self, PyString_AS_STRING(from)[0],
3197 to, maxcount);
3198 } else {
3199 /* len('from')>=2, len('to')>=1 */
3200 return replace_substring(self, from, to, maxcount);
3201 }
3202}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003204PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003205"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003206\n\
3207Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003208old replaced by new. If the optional argument count is\n\
3209given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003210
3211static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003212string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003213{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003214 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003215 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003216 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003217 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003219 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003220 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003222 if (PyString_Check(from)) {
3223 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003225#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003226 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003227 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003228 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003229#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003230 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 return NULL;
3232
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003233 if (PyString_Check(to)) {
3234 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003236#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003237 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003238 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003239 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003240#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003241 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003242 return NULL;
3243
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003244 return (PyObject *)replace((PyStringObject *) self,
3245 (PyStringObject *) from,
3246 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003247}
3248
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003249/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003250
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003251PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003252"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003253\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003254Return True if S starts with the specified prefix, False otherwise.\n\
3255With optional start, test S beginning at that position.\n\
3256With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003257
3258static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003259string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003260{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003261 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003262 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003264 Py_ssize_t plen;
3265 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003266 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003267 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003268
Guido van Rossumc6821402000-05-08 14:08:05 +00003269 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3270 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003271 return NULL;
3272 if (PyString_Check(subobj)) {
3273 prefix = PyString_AS_STRING(subobj);
3274 plen = PyString_GET_SIZE(subobj);
3275 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003276#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003277 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003278 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003279 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003280 subobj, start, end, -1);
3281 if (rc == -1)
3282 return NULL;
3283 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003284 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003286#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003287 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003288 return NULL;
3289
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003290 string_adjust_indices(&start, &end, len);
3291
3292 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003293 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003294
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003295 if (end-start >= plen)
3296 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3297 else
3298 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003299}
3300
3301
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003302PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003303"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003304\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003305Return True if S ends with the specified suffix, False otherwise.\n\
3306With optional start, test S beginning at that position.\n\
3307With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003308
3309static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003310string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003311{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003312 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003313 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003314 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003315 Py_ssize_t slen;
3316 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003317 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003318 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003319
Guido van Rossumc6821402000-05-08 14:08:05 +00003320 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3321 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003322 return NULL;
3323 if (PyString_Check(subobj)) {
3324 suffix = PyString_AS_STRING(subobj);
3325 slen = PyString_GET_SIZE(subobj);
3326 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003327#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003328 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003329 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003330 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003331 subobj, start, end, +1);
3332 if (rc == -1)
3333 return NULL;
3334 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003335 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003336 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003337#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003339 return NULL;
3340
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003341 string_adjust_indices(&start, &end, len);
3342
3343 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003344 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003345
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003346 if (end-slen > start)
3347 start = end - slen;
3348 if (end-start >= slen)
3349 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3350 else
3351 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003352}
3353
3354
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003355PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003356"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003357\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003358Encodes S using the codec registered for encoding. encoding defaults\n\
3359to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003360handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003361a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3362'xmlcharrefreplace' as well as any other name registered with\n\
3363codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003364
3365static PyObject *
3366string_encode(PyStringObject *self, PyObject *args)
3367{
3368 char *encoding = NULL;
3369 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003370 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003371
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003372 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3373 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003374 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003375 if (v == NULL)
3376 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003377 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3378 PyErr_Format(PyExc_TypeError,
3379 "encoder did not return a string/unicode object "
3380 "(type=%.400s)",
3381 v->ob_type->tp_name);
3382 Py_DECREF(v);
3383 return NULL;
3384 }
3385 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003386
3387 onError:
3388 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003389}
3390
3391
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003392PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003393"S.decode([encoding[,errors]]) -> object\n\
3394\n\
3395Decodes S using the codec registered for encoding. encoding defaults\n\
3396to the default encoding. errors may be given to set a different error\n\
3397handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003398a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3399as well as any other name registerd with codecs.register_error that is\n\
3400able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003401
3402static PyObject *
3403string_decode(PyStringObject *self, PyObject *args)
3404{
3405 char *encoding = NULL;
3406 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003407 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003408
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003409 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3410 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003411 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003412 if (v == NULL)
3413 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003414 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3415 PyErr_Format(PyExc_TypeError,
3416 "decoder did not return a string/unicode object "
3417 "(type=%.400s)",
3418 v->ob_type->tp_name);
3419 Py_DECREF(v);
3420 return NULL;
3421 }
3422 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003423
3424 onError:
3425 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003426}
3427
3428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003429PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430"S.expandtabs([tabsize]) -> string\n\
3431\n\
3432Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003433If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434
3435static PyObject*
3436string_expandtabs(PyStringObject *self, PyObject *args)
3437{
3438 const char *e, *p;
3439 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003440 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003441 PyObject *u;
3442 int tabsize = 8;
3443
3444 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3445 return NULL;
3446
Thomas Wouters7e474022000-07-16 12:04:32 +00003447 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448 i = j = 0;
3449 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3450 for (p = PyString_AS_STRING(self); p < e; p++)
3451 if (*p == '\t') {
3452 if (tabsize > 0)
3453 j += tabsize - (j % tabsize);
3454 }
3455 else {
3456 j++;
3457 if (*p == '\n' || *p == '\r') {
3458 i += j;
3459 j = 0;
3460 }
3461 }
3462
3463 /* Second pass: create output string and fill it */
3464 u = PyString_FromStringAndSize(NULL, i + j);
3465 if (!u)
3466 return NULL;
3467
3468 j = 0;
3469 q = PyString_AS_STRING(u);
3470
3471 for (p = PyString_AS_STRING(self); p < e; p++)
3472 if (*p == '\t') {
3473 if (tabsize > 0) {
3474 i = tabsize - (j % tabsize);
3475 j += i;
3476 while (i--)
3477 *q++ = ' ';
3478 }
3479 }
3480 else {
3481 j++;
3482 *q++ = *p;
3483 if (*p == '\n' || *p == '\r')
3484 j = 0;
3485 }
3486
3487 return u;
3488}
3489
Tim Peters8fa5dd02001-09-12 02:18:30 +00003490static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003491pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003492{
3493 PyObject *u;
3494
3495 if (left < 0)
3496 left = 0;
3497 if (right < 0)
3498 right = 0;
3499
Tim Peters8fa5dd02001-09-12 02:18:30 +00003500 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501 Py_INCREF(self);
3502 return (PyObject *)self;
3503 }
3504
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003505 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506 left + PyString_GET_SIZE(self) + right);
3507 if (u) {
3508 if (left)
3509 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003510 memcpy(PyString_AS_STRING(u) + left,
3511 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512 PyString_GET_SIZE(self));
3513 if (right)
3514 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3515 fill, right);
3516 }
3517
3518 return u;
3519}
3520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003521PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003522"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003523"\n"
3524"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003525"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526
3527static PyObject *
3528string_ljust(PyStringObject *self, PyObject *args)
3529{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003530 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003531 char fillchar = ' ';
3532
Thomas Wouters4abb3662006-04-19 14:50:15 +00003533 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534 return NULL;
3535
Tim Peters8fa5dd02001-09-12 02:18:30 +00003536 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537 Py_INCREF(self);
3538 return (PyObject*) self;
3539 }
3540
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003541 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542}
3543
3544
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003545PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003546"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003547"\n"
3548"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003549"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550
3551static PyObject *
3552string_rjust(PyStringObject *self, PyObject *args)
3553{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003554 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003555 char fillchar = ' ';
3556
Thomas Wouters4abb3662006-04-19 14:50:15 +00003557 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003558 return NULL;
3559
Tim Peters8fa5dd02001-09-12 02:18:30 +00003560 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561 Py_INCREF(self);
3562 return (PyObject*) self;
3563 }
3564
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003565 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566}
3567
3568
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003569PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003570"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003571"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003572"Return S centered in a string of length width. Padding is\n"
3573"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003574
3575static PyObject *
3576string_center(PyStringObject *self, PyObject *args)
3577{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003578 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003579 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003580 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003581
Thomas Wouters4abb3662006-04-19 14:50:15 +00003582 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003583 return NULL;
3584
Tim Peters8fa5dd02001-09-12 02:18:30 +00003585 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586 Py_INCREF(self);
3587 return (PyObject*) self;
3588 }
3589
3590 marg = width - PyString_GET_SIZE(self);
3591 left = marg / 2 + (marg & width & 1);
3592
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003593 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003594}
3595
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003596PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003597"S.zfill(width) -> string\n"
3598"\n"
3599"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003600"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003601
3602static PyObject *
3603string_zfill(PyStringObject *self, PyObject *args)
3604{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003605 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003606 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003607 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003608 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003609
Thomas Wouters4abb3662006-04-19 14:50:15 +00003610 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003611 return NULL;
3612
3613 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003614 if (PyString_CheckExact(self)) {
3615 Py_INCREF(self);
3616 return (PyObject*) self;
3617 }
3618 else
3619 return PyString_FromStringAndSize(
3620 PyString_AS_STRING(self),
3621 PyString_GET_SIZE(self)
3622 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003623 }
3624
3625 fill = width - PyString_GET_SIZE(self);
3626
3627 s = pad(self, fill, 0, '0');
3628
3629 if (s == NULL)
3630 return NULL;
3631
3632 p = PyString_AS_STRING(s);
3633 if (p[fill] == '+' || p[fill] == '-') {
3634 /* move sign to beginning of string */
3635 p[0] = p[fill];
3636 p[fill] = '0';
3637 }
3638
3639 return (PyObject*) s;
3640}
3641
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003642PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003643"S.isspace() -> bool\n\
3644\n\
3645Return True if all characters in S are whitespace\n\
3646and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003647
3648static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003649string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650{
Fred Drakeba096332000-07-09 07:04:36 +00003651 register const unsigned char *p
3652 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003653 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 /* Shortcut for single character strings */
3656 if (PyString_GET_SIZE(self) == 1 &&
3657 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003659
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003660 /* Special case for empty strings */
3661 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003663
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664 e = p + PyString_GET_SIZE(self);
3665 for (; p < e; p++) {
3666 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003670}
3671
3672
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003673PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003674"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003675\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003676Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003677and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003678
3679static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003680string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003681{
Fred Drakeba096332000-07-09 07:04:36 +00003682 register const unsigned char *p
3683 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003684 register const unsigned char *e;
3685
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003686 /* Shortcut for single character strings */
3687 if (PyString_GET_SIZE(self) == 1 &&
3688 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003690
3691 /* Special case for empty strings */
3692 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003693 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003694
3695 e = p + PyString_GET_SIZE(self);
3696 for (; p < e; p++) {
3697 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003698 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003699 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003700 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003701}
3702
3703
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003704PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003706\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003707Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003708and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003709
3710static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003711string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003712{
Fred Drakeba096332000-07-09 07:04:36 +00003713 register const unsigned char *p
3714 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003715 register const unsigned char *e;
3716
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003717 /* Shortcut for single character strings */
3718 if (PyString_GET_SIZE(self) == 1 &&
3719 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003721
3722 /* Special case for empty strings */
3723 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003725
3726 e = p + PyString_GET_SIZE(self);
3727 for (; p < e; p++) {
3728 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003730 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003731 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003732}
3733
3734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003735PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003738Return True if all characters in S are digits\n\
3739and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740
3741static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003742string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743{
Fred Drakeba096332000-07-09 07:04:36 +00003744 register const unsigned char *p
3745 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003746 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 /* Shortcut for single character strings */
3749 if (PyString_GET_SIZE(self) == 1 &&
3750 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003751 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003753 /* Special case for empty strings */
3754 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003755 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003756
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 e = p + PyString_GET_SIZE(self);
3758 for (; p < e; p++) {
3759 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003762 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763}
3764
3765
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003766PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003767"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003769Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003770at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771
3772static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003773string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774{
Fred Drakeba096332000-07-09 07:04:36 +00003775 register const unsigned char *p
3776 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003777 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778 int cased;
3779
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780 /* Shortcut for single character strings */
3781 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003782 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003784 /* Special case for empty strings */
3785 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003786 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003787
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788 e = p + PyString_GET_SIZE(self);
3789 cased = 0;
3790 for (; p < e; p++) {
3791 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003792 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003793 else if (!cased && islower(*p))
3794 cased = 1;
3795 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003796 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797}
3798
3799
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003800PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003801"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003802\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003803Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003804at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805
3806static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003807string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808{
Fred Drakeba096332000-07-09 07:04:36 +00003809 register const unsigned char *p
3810 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003811 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003812 int cased;
3813
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814 /* Shortcut for single character strings */
3815 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003816 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003818 /* Special case for empty strings */
3819 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003820 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003821
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822 e = p + PyString_GET_SIZE(self);
3823 cased = 0;
3824 for (; p < e; p++) {
3825 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003826 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827 else if (!cased && isupper(*p))
3828 cased = 1;
3829 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003830 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831}
3832
3833
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003834PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003835"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003837Return True if S is a titlecased string and there is at least one\n\
3838character in S, i.e. uppercase characters may only follow uncased\n\
3839characters and lowercase characters only cased ones. Return False\n\
3840otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841
3842static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003843string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003844{
Fred Drakeba096332000-07-09 07:04:36 +00003845 register const unsigned char *p
3846 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003847 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848 int cased, previous_is_cased;
3849
Guido van Rossum4c08d552000-03-10 22:55:18 +00003850 /* Shortcut for single character strings */
3851 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003852 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003854 /* Special case for empty strings */
3855 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003856 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003857
Guido van Rossum4c08d552000-03-10 22:55:18 +00003858 e = p + PyString_GET_SIZE(self);
3859 cased = 0;
3860 previous_is_cased = 0;
3861 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003862 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003863
3864 if (isupper(ch)) {
3865 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003866 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003867 previous_is_cased = 1;
3868 cased = 1;
3869 }
3870 else if (islower(ch)) {
3871 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003872 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003873 previous_is_cased = 1;
3874 cased = 1;
3875 }
3876 else
3877 previous_is_cased = 0;
3878 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003879 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003880}
3881
3882
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003883PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003884"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003885\n\
3886Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003887Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003888is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003889
Guido van Rossum4c08d552000-03-10 22:55:18 +00003890static PyObject*
3891string_splitlines(PyStringObject *self, PyObject *args)
3892{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003893 register Py_ssize_t i;
3894 register Py_ssize_t j;
3895 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003896 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003897 PyObject *list;
3898 PyObject *str;
3899 char *data;
3900
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003901 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003902 return NULL;
3903
3904 data = PyString_AS_STRING(self);
3905 len = PyString_GET_SIZE(self);
3906
Guido van Rossum4c08d552000-03-10 22:55:18 +00003907 list = PyList_New(0);
3908 if (!list)
3909 goto onError;
3910
3911 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003912 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003913
Guido van Rossum4c08d552000-03-10 22:55:18 +00003914 /* Find a line and append it */
3915 while (i < len && data[i] != '\n' && data[i] != '\r')
3916 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003917
3918 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003919 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003920 if (i < len) {
3921 if (data[i] == '\r' && i + 1 < len &&
3922 data[i+1] == '\n')
3923 i += 2;
3924 else
3925 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003926 if (keepends)
3927 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003928 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003929 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003930 j = i;
3931 }
3932 if (j < len) {
3933 SPLIT_APPEND(data, j, len);
3934 }
3935
3936 return list;
3937
3938 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003939 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003940 return NULL;
3941}
3942
3943#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003944#undef SPLIT_ADD
3945#undef MAX_PREALLOC
3946#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003947
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003948static PyObject *
3949string_getnewargs(PyStringObject *v)
3950{
3951 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3952}
3953
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003954
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003955static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003956string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003957 /* Counterparts of the obsolete stropmodule functions; except
3958 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003959 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3960 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003961 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003962 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3963 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003964 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3965 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3966 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3967 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3968 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3969 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3970 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003971 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3972 capitalize__doc__},
3973 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3974 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3975 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003976 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003977 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3978 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3979 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3980 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3981 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3982 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3983 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3984 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3985 startswith__doc__},
3986 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3987 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3988 swapcase__doc__},
3989 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3990 translate__doc__},
3991 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3992 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3993 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3994 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3995 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3996 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3997 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3998 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3999 expandtabs__doc__},
4000 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4001 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00004002 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004003 {NULL, NULL} /* sentinel */
4004};
4005
Jeremy Hylton938ace62002-07-17 16:30:39 +00004006static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00004007str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004009static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00004010string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004011{
Tim Peters6d6c1a32001-08-02 04:15:00 +00004012 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00004013 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00004014
Guido van Rossumae960af2001-08-30 03:11:59 +00004015 if (type != &PyString_Type)
4016 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00004017 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4018 return NULL;
4019 if (x == NULL)
4020 return PyString_FromString("");
4021 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004022}
4023
Guido van Rossumae960af2001-08-30 03:11:59 +00004024static PyObject *
4025str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4026{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004027 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004028 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004029
4030 assert(PyType_IsSubtype(type, &PyString_Type));
4031 tmp = string_new(&PyString_Type, args, kwds);
4032 if (tmp == NULL)
4033 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004034 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004035 n = PyString_GET_SIZE(tmp);
4036 pnew = type->tp_alloc(type, n);
4037 if (pnew != NULL) {
4038 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004039 ((PyStringObject *)pnew)->ob_shash =
4040 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004041 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004042 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004043 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004044 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004045}
4046
Guido van Rossumcacfc072002-05-24 19:01:59 +00004047static PyObject *
4048basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4049{
4050 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004051 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004052 return NULL;
4053}
4054
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004055static PyObject *
4056string_mod(PyObject *v, PyObject *w)
4057{
4058 if (!PyString_Check(v)) {
4059 Py_INCREF(Py_NotImplemented);
4060 return Py_NotImplemented;
4061 }
4062 return PyString_Format(v, w);
4063}
4064
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004065PyDoc_STRVAR(basestring_doc,
4066"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004067
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004068static PyNumberMethods string_as_number = {
4069 0, /*nb_add*/
4070 0, /*nb_subtract*/
4071 0, /*nb_multiply*/
4072 0, /*nb_divide*/
4073 string_mod, /*nb_remainder*/
4074};
4075
4076
Guido van Rossumcacfc072002-05-24 19:01:59 +00004077PyTypeObject PyBaseString_Type = {
4078 PyObject_HEAD_INIT(&PyType_Type)
4079 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004080 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004081 0,
4082 0,
4083 0, /* tp_dealloc */
4084 0, /* tp_print */
4085 0, /* tp_getattr */
4086 0, /* tp_setattr */
4087 0, /* tp_compare */
4088 0, /* tp_repr */
4089 0, /* tp_as_number */
4090 0, /* tp_as_sequence */
4091 0, /* tp_as_mapping */
4092 0, /* tp_hash */
4093 0, /* tp_call */
4094 0, /* tp_str */
4095 0, /* tp_getattro */
4096 0, /* tp_setattro */
4097 0, /* tp_as_buffer */
4098 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4099 basestring_doc, /* tp_doc */
4100 0, /* tp_traverse */
4101 0, /* tp_clear */
4102 0, /* tp_richcompare */
4103 0, /* tp_weaklistoffset */
4104 0, /* tp_iter */
4105 0, /* tp_iternext */
4106 0, /* tp_methods */
4107 0, /* tp_members */
4108 0, /* tp_getset */
4109 &PyBaseObject_Type, /* tp_base */
4110 0, /* tp_dict */
4111 0, /* tp_descr_get */
4112 0, /* tp_descr_set */
4113 0, /* tp_dictoffset */
4114 0, /* tp_init */
4115 0, /* tp_alloc */
4116 basestring_new, /* tp_new */
4117 0, /* tp_free */
4118};
4119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004120PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004121"str(object) -> string\n\
4122\n\
4123Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004124If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004125
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126PyTypeObject PyString_Type = {
4127 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004128 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004129 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004130 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004131 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004132 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004133 (printfunc)string_print, /* tp_print */
4134 0, /* tp_getattr */
4135 0, /* tp_setattr */
4136 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004137 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004138 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004139 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004140 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004141 (hashfunc)string_hash, /* tp_hash */
4142 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004143 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004144 PyObject_GenericGetAttr, /* tp_getattro */
4145 0, /* tp_setattro */
4146 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004147 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004148 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004149 string_doc, /* tp_doc */
4150 0, /* tp_traverse */
4151 0, /* tp_clear */
4152 (richcmpfunc)string_richcompare, /* tp_richcompare */
4153 0, /* tp_weaklistoffset */
4154 0, /* tp_iter */
4155 0, /* tp_iternext */
4156 string_methods, /* tp_methods */
4157 0, /* tp_members */
4158 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004159 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004160 0, /* tp_dict */
4161 0, /* tp_descr_get */
4162 0, /* tp_descr_set */
4163 0, /* tp_dictoffset */
4164 0, /* tp_init */
4165 0, /* tp_alloc */
4166 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004167 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004168};
4169
4170void
Fred Drakeba096332000-07-09 07:04:36 +00004171PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004172{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004173 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004174 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004175 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004176 if (w == NULL || !PyString_Check(*pv)) {
4177 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004178 *pv = NULL;
4179 return;
4180 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004181 v = string_concat((PyStringObject *) *pv, w);
4182 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004183 *pv = v;
4184}
4185
Guido van Rossum013142a1994-08-30 08:19:36 +00004186void
Fred Drakeba096332000-07-09 07:04:36 +00004187PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004188{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004189 PyString_Concat(pv, w);
4190 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004191}
4192
4193
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004194/* The following function breaks the notion that strings are immutable:
4195 it changes the size of a string. We get away with this only if there
4196 is only one module referencing the object. You can also think of it
4197 as creating a new string object and destroying the old one, only
4198 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004199 already be known to some other part of the code...
4200 Note that if there's not enough memory to resize the string, the original
4201 string object at *pv is deallocated, *pv is set to NULL, an "out of
4202 memory" exception is set, and -1 is returned. Else (on success) 0 is
4203 returned, and the value in *pv may or may not be the same as on input.
4204 As always, an extra byte is allocated for a trailing \0 byte (newsize
4205 does *not* include that), and a trailing \0 byte is stored.
4206*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004207
4208int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004209_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004210{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004211 register PyObject *v;
4212 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004213 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004214 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4215 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004216 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004217 Py_DECREF(v);
4218 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004219 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004220 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004221 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004222 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004223 _Py_ForgetReference(v);
4224 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004225 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004226 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004227 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004228 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004229 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004230 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004231 _Py_NewReference(*pv);
4232 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004233 sv->ob_size = newsize;
4234 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004235 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004236 return 0;
4237}
Guido van Rossume5372401993-03-16 12:15:04 +00004238
4239/* Helpers for formatstring */
4240
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004241static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004242getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004243{
Thomas Wouters977485d2006-02-16 15:59:12 +00004244 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004245 if (argidx < arglen) {
4246 (*p_argidx)++;
4247 if (arglen < 0)
4248 return args;
4249 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004250 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004251 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004252 PyErr_SetString(PyExc_TypeError,
4253 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004254 return NULL;
4255}
4256
Tim Peters38fd5b62000-09-21 05:43:11 +00004257/* Format codes
4258 * F_LJUST '-'
4259 * F_SIGN '+'
4260 * F_BLANK ' '
4261 * F_ALT '#'
4262 * F_ZERO '0'
4263 */
Guido van Rossume5372401993-03-16 12:15:04 +00004264#define F_LJUST (1<<0)
4265#define F_SIGN (1<<1)
4266#define F_BLANK (1<<2)
4267#define F_ALT (1<<3)
4268#define F_ZERO (1<<4)
4269
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004270static int
Fred Drakeba096332000-07-09 07:04:36 +00004271formatfloat(char *buf, size_t buflen, int flags,
4272 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004273{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004274 /* fmt = '%#.' + `prec` + `type`
4275 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004276 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004277 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004278 x = PyFloat_AsDouble(v);
4279 if (x == -1.0 && PyErr_Occurred()) {
4280 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004281 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004282 }
Guido van Rossume5372401993-03-16 12:15:04 +00004283 if (prec < 0)
4284 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004285 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4286 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004287 /* Worst case length calc to ensure no buffer overrun:
4288
4289 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004290 fmt = %#.<prec>g
4291 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004292 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004293 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004294
4295 'f' formats:
4296 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4297 len = 1 + 50 + 1 + prec = 52 + prec
4298
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004299 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004300 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004301
4302 */
4303 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4304 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004305 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004306 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004307 return -1;
4308 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004309 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4310 (flags&F_ALT) ? "#" : "",
4311 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004312 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004313 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004314}
4315
Tim Peters38fd5b62000-09-21 05:43:11 +00004316/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4317 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4318 * Python's regular ints.
4319 * Return value: a new PyString*, or NULL if error.
4320 * . *pbuf is set to point into it,
4321 * *plen set to the # of chars following that.
4322 * Caller must decref it when done using pbuf.
4323 * The string starting at *pbuf is of the form
4324 * "-"? ("0x" | "0X")? digit+
4325 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004326 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004327 * There will be at least prec digits, zero-filled on the left if
4328 * necessary to get that many.
4329 * val object to be converted
4330 * flags bitmask of format flags; only F_ALT is looked at
4331 * prec minimum number of digits; 0-fill on left if needed
4332 * type a character in [duoxX]; u acts the same as d
4333 *
4334 * CAUTION: o, x and X conversions on regular ints can never
4335 * produce a '-' sign, but can for Python's unbounded ints.
4336 */
4337PyObject*
4338_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4339 char **pbuf, int *plen)
4340{
4341 PyObject *result = NULL;
4342 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004343 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004344 int sign; /* 1 if '-', else 0 */
4345 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004346 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 int numdigits; /* len == numnondigits + numdigits */
4348 int numnondigits = 0;
4349
4350 switch (type) {
4351 case 'd':
4352 case 'u':
4353 result = val->ob_type->tp_str(val);
4354 break;
4355 case 'o':
4356 result = val->ob_type->tp_as_number->nb_oct(val);
4357 break;
4358 case 'x':
4359 case 'X':
4360 numnondigits = 2;
4361 result = val->ob_type->tp_as_number->nb_hex(val);
4362 break;
4363 default:
4364 assert(!"'type' not in [duoxX]");
4365 }
4366 if (!result)
4367 return NULL;
4368
4369 /* To modify the string in-place, there can only be one reference. */
4370 if (result->ob_refcnt != 1) {
4371 PyErr_BadInternalCall();
4372 return NULL;
4373 }
4374 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004375 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004376 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004377 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4378 return NULL;
4379 }
4380 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004381 if (buf[len-1] == 'L') {
4382 --len;
4383 buf[len] = '\0';
4384 }
4385 sign = buf[0] == '-';
4386 numnondigits += sign;
4387 numdigits = len - numnondigits;
4388 assert(numdigits > 0);
4389
Tim Petersfff53252001-04-12 18:38:48 +00004390 /* Get rid of base marker unless F_ALT */
4391 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004392 /* Need to skip 0x, 0X or 0. */
4393 int skipped = 0;
4394 switch (type) {
4395 case 'o':
4396 assert(buf[sign] == '0');
4397 /* If 0 is only digit, leave it alone. */
4398 if (numdigits > 1) {
4399 skipped = 1;
4400 --numdigits;
4401 }
4402 break;
4403 case 'x':
4404 case 'X':
4405 assert(buf[sign] == '0');
4406 assert(buf[sign + 1] == 'x');
4407 skipped = 2;
4408 numnondigits -= 2;
4409 break;
4410 }
4411 if (skipped) {
4412 buf += skipped;
4413 len -= skipped;
4414 if (sign)
4415 buf[0] = '-';
4416 }
4417 assert(len == numnondigits + numdigits);
4418 assert(numdigits > 0);
4419 }
4420
4421 /* Fill with leading zeroes to meet minimum width. */
4422 if (prec > numdigits) {
4423 PyObject *r1 = PyString_FromStringAndSize(NULL,
4424 numnondigits + prec);
4425 char *b1;
4426 if (!r1) {
4427 Py_DECREF(result);
4428 return NULL;
4429 }
4430 b1 = PyString_AS_STRING(r1);
4431 for (i = 0; i < numnondigits; ++i)
4432 *b1++ = *buf++;
4433 for (i = 0; i < prec - numdigits; i++)
4434 *b1++ = '0';
4435 for (i = 0; i < numdigits; i++)
4436 *b1++ = *buf++;
4437 *b1 = '\0';
4438 Py_DECREF(result);
4439 result = r1;
4440 buf = PyString_AS_STRING(result);
4441 len = numnondigits + prec;
4442 }
4443
4444 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004445 if (type == 'X') {
4446 /* Need to convert all lower case letters to upper case.
4447 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004448 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004449 if (buf[i] >= 'a' && buf[i] <= 'x')
4450 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004451 }
4452 *pbuf = buf;
4453 *plen = len;
4454 return result;
4455}
4456
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004457static int
Fred Drakeba096332000-07-09 07:04:36 +00004458formatint(char *buf, size_t buflen, int flags,
4459 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004460{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004461 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004462 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4463 + 1 + 1 = 24 */
4464 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004465 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004466 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004467
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004468 x = PyInt_AsLong(v);
4469 if (x == -1 && PyErr_Occurred()) {
4470 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004471 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004472 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004473 if (x < 0 && type == 'u') {
4474 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004475 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004476 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4477 sign = "-";
4478 else
4479 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004480 if (prec < 0)
4481 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004482
4483 if ((flags & F_ALT) &&
4484 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004485 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004486 * of issues that cause pain:
4487 * - when 0 is being converted, the C standard leaves off
4488 * the '0x' or '0X', which is inconsistent with other
4489 * %#x/%#X conversions and inconsistent with Python's
4490 * hex() function
4491 * - there are platforms that violate the standard and
4492 * convert 0 with the '0x' or '0X'
4493 * (Metrowerks, Compaq Tru64)
4494 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004495 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004496 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004497 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004498 * We can achieve the desired consistency by inserting our
4499 * own '0x' or '0X' prefix, and substituting %x/%X in place
4500 * of %#x/%#X.
4501 *
4502 * Note that this is the same approach as used in
4503 * formatint() in unicodeobject.c
4504 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004505 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4506 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004507 }
4508 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004509 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4510 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004511 prec, type);
4512 }
4513
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004514 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4515 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004516 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004517 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004518 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004519 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004520 return -1;
4521 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004522 if (sign[0])
4523 PyOS_snprintf(buf, buflen, fmt, -x);
4524 else
4525 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004526 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004527}
4528
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004529static int
Fred Drakeba096332000-07-09 07:04:36 +00004530formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004531{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004532 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 if (PyString_Check(v)) {
4534 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004535 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004536 }
4537 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004538 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004539 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004540 }
4541 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004542 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004543}
4544
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004545/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4546
4547 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4548 chars are formatted. XXX This is a magic number. Each formatting
4549 routine does bounds checking to ensure no overflow, but a better
4550 solution may be to malloc a buffer of appropriate size for each
4551 format. For now, the current solution is sufficient.
4552*/
4553#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004554
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004555PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004556PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004557{
4558 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004559 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004560 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004561 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004562 PyObject *result, *orig_args;
4563#ifdef Py_USING_UNICODE
4564 PyObject *v, *w;
4565#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004566 PyObject *dict = NULL;
4567 if (format == NULL || !PyString_Check(format) || args == NULL) {
4568 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004569 return NULL;
4570 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004571 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004572 fmt = PyString_AS_STRING(format);
4573 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004574 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004575 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004576 if (result == NULL)
4577 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004578 res = PyString_AsString(result);
4579 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004580 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004581 argidx = 0;
4582 }
4583 else {
4584 arglen = -1;
4585 argidx = -2;
4586 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004587 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4588 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004589 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004590 while (--fmtcnt >= 0) {
4591 if (*fmt != '%') {
4592 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004593 rescnt = fmtcnt + 100;
4594 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004595 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004596 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004597 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004598 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004599 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004600 }
4601 *res++ = *fmt++;
4602 }
4603 else {
4604 /* Got a format specifier */
4605 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004606 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004607 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004608 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004609 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004610 PyObject *v = NULL;
4611 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004612 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004613 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004614 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004615 char formatbuf[FORMATBUFLEN];
4616 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004617#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004618 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004619 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004620#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004621
Guido van Rossumda9c2711996-12-05 21:58:58 +00004622 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004623 if (*fmt == '(') {
4624 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004625 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004626 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004627 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004628
4629 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004630 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004631 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004632 goto error;
4633 }
4634 ++fmt;
4635 --fmtcnt;
4636 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004637 /* Skip over balanced parentheses */
4638 while (pcount > 0 && --fmtcnt >= 0) {
4639 if (*fmt == ')')
4640 --pcount;
4641 else if (*fmt == '(')
4642 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004643 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004644 }
4645 keylen = fmt - keystart - 1;
4646 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004647 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004648 "incomplete format key");
4649 goto error;
4650 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004651 key = PyString_FromStringAndSize(keystart,
4652 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004653 if (key == NULL)
4654 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004655 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004656 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004657 args_owned = 0;
4658 }
4659 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004660 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004661 if (args == NULL) {
4662 goto error;
4663 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004664 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004665 arglen = -1;
4666 argidx = -2;
4667 }
Guido van Rossume5372401993-03-16 12:15:04 +00004668 while (--fmtcnt >= 0) {
4669 switch (c = *fmt++) {
4670 case '-': flags |= F_LJUST; continue;
4671 case '+': flags |= F_SIGN; continue;
4672 case ' ': flags |= F_BLANK; continue;
4673 case '#': flags |= F_ALT; continue;
4674 case '0': flags |= F_ZERO; continue;
4675 }
4676 break;
4677 }
4678 if (c == '*') {
4679 v = getnextarg(args, arglen, &argidx);
4680 if (v == NULL)
4681 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004682 if (!PyInt_Check(v)) {
4683 PyErr_SetString(PyExc_TypeError,
4684 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004685 goto error;
4686 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004687 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004688 if (width < 0) {
4689 flags |= F_LJUST;
4690 width = -width;
4691 }
Guido van Rossume5372401993-03-16 12:15:04 +00004692 if (--fmtcnt >= 0)
4693 c = *fmt++;
4694 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004695 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004696 width = c - '0';
4697 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004698 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004699 if (!isdigit(c))
4700 break;
4701 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004702 PyErr_SetString(
4703 PyExc_ValueError,
4704 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004705 goto error;
4706 }
4707 width = width*10 + (c - '0');
4708 }
4709 }
4710 if (c == '.') {
4711 prec = 0;
4712 if (--fmtcnt >= 0)
4713 c = *fmt++;
4714 if (c == '*') {
4715 v = getnextarg(args, arglen, &argidx);
4716 if (v == NULL)
4717 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004718 if (!PyInt_Check(v)) {
4719 PyErr_SetString(
4720 PyExc_TypeError,
4721 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004722 goto error;
4723 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004724 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004725 if (prec < 0)
4726 prec = 0;
4727 if (--fmtcnt >= 0)
4728 c = *fmt++;
4729 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004730 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004731 prec = c - '0';
4732 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004733 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004734 if (!isdigit(c))
4735 break;
4736 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004737 PyErr_SetString(
4738 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004739 "prec too big");
4740 goto error;
4741 }
4742 prec = prec*10 + (c - '0');
4743 }
4744 }
4745 } /* prec */
4746 if (fmtcnt >= 0) {
4747 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004748 if (--fmtcnt >= 0)
4749 c = *fmt++;
4750 }
4751 }
4752 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004753 PyErr_SetString(PyExc_ValueError,
4754 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004755 goto error;
4756 }
4757 if (c != '%') {
4758 v = getnextarg(args, arglen, &argidx);
4759 if (v == NULL)
4760 goto error;
4761 }
4762 sign = 0;
4763 fill = ' ';
4764 switch (c) {
4765 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004766 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004767 len = 1;
4768 break;
4769 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004770#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004771 if (PyUnicode_Check(v)) {
4772 fmt = fmt_start;
4773 argidx = argidx_start;
4774 goto unicode;
4775 }
Georg Brandld45014b2005-10-01 17:06:00 +00004776#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004777 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004778#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004779 if (temp != NULL && PyUnicode_Check(temp)) {
4780 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004781 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004782 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004783 goto unicode;
4784 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004785#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004786 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004787 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004788 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004789 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004790 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004791 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004792 if (!PyString_Check(temp)) {
4793 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004794 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004795 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004796 goto error;
4797 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004798 pbuf = PyString_AS_STRING(temp);
4799 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004800 if (prec >= 0 && len > prec)
4801 len = prec;
4802 break;
4803 case 'i':
4804 case 'd':
4805 case 'u':
4806 case 'o':
4807 case 'x':
4808 case 'X':
4809 if (c == 'i')
4810 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004811 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004812 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004814 prec, c, &pbuf, &ilen);
4815 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004816 if (!temp)
4817 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004818 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004819 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004820 else {
4821 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004822 len = formatint(pbuf,
4823 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004824 flags, prec, c, v);
4825 if (len < 0)
4826 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004827 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004828 }
4829 if (flags & F_ZERO)
4830 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004831 break;
4832 case 'e':
4833 case 'E':
4834 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004835 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004836 case 'g':
4837 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004838 if (c == 'F')
4839 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004840 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004841 len = formatfloat(pbuf, sizeof(formatbuf),
4842 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004843 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004844 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004845 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004846 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004847 fill = '0';
4848 break;
4849 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004850#ifdef Py_USING_UNICODE
4851 if (PyUnicode_Check(v)) {
4852 fmt = fmt_start;
4853 argidx = argidx_start;
4854 goto unicode;
4855 }
4856#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004857 pbuf = formatbuf;
4858 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004859 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004860 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004861 break;
4862 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004863 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004864 "unsupported format character '%c' (0x%x) "
4865 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004866 c, c,
4867 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004868 goto error;
4869 }
4870 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004871 if (*pbuf == '-' || *pbuf == '+') {
4872 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004873 len--;
4874 }
4875 else if (flags & F_SIGN)
4876 sign = '+';
4877 else if (flags & F_BLANK)
4878 sign = ' ';
4879 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004880 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004881 }
4882 if (width < len)
4883 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004884 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004885 reslen -= rescnt;
4886 rescnt = width + fmtcnt + 100;
4887 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004888 if (reslen < 0) {
4889 Py_DECREF(result);
4890 return PyErr_NoMemory();
4891 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004892 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004893 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004894 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004895 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004896 }
4897 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004898 if (fill != ' ')
4899 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004900 rescnt--;
4901 if (width > len)
4902 width--;
4903 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004904 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4905 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004906 assert(pbuf[1] == c);
4907 if (fill != ' ') {
4908 *res++ = *pbuf++;
4909 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004910 }
Tim Petersfff53252001-04-12 18:38:48 +00004911 rescnt -= 2;
4912 width -= 2;
4913 if (width < 0)
4914 width = 0;
4915 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004916 }
4917 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004918 do {
4919 --rescnt;
4920 *res++ = fill;
4921 } while (--width > len);
4922 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004923 if (fill == ' ') {
4924 if (sign)
4925 *res++ = sign;
4926 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004927 (c == 'x' || c == 'X')) {
4928 assert(pbuf[0] == '0');
4929 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004930 *res++ = *pbuf++;
4931 *res++ = *pbuf++;
4932 }
4933 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004934 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004935 res += len;
4936 rescnt -= len;
4937 while (--width >= len) {
4938 --rescnt;
4939 *res++ = ' ';
4940 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004941 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004942 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004943 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004944 goto error;
4945 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004946 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004947 } /* '%' */
4948 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004949 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004950 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004951 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004952 goto error;
4953 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004954 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004955 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004956 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004957 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004958 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004959
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004960#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004961 unicode:
4962 if (args_owned) {
4963 Py_DECREF(args);
4964 args_owned = 0;
4965 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004966 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004967 if (PyTuple_Check(orig_args) && argidx > 0) {
4968 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004969 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004970 v = PyTuple_New(n);
4971 if (v == NULL)
4972 goto error;
4973 while (--n >= 0) {
4974 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4975 Py_INCREF(w);
4976 PyTuple_SET_ITEM(v, n, w);
4977 }
4978 args = v;
4979 } else {
4980 Py_INCREF(orig_args);
4981 args = orig_args;
4982 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004983 args_owned = 1;
4984 /* Take what we have of the result and let the Unicode formatting
4985 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004986 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004987 if (_PyString_Resize(&result, rescnt))
4988 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004989 fmtcnt = PyString_GET_SIZE(format) - \
4990 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004991 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4992 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004993 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004994 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004995 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004996 if (v == NULL)
4997 goto error;
4998 /* Paste what we have (result) to what the Unicode formatting
4999 function returned (v) and return the result (or error) */
5000 w = PyUnicode_Concat(result, v);
5001 Py_DECREF(result);
5002 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00005003 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00005004 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00005005#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00005006
Guido van Rossume5372401993-03-16 12:15:04 +00005007 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005008 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005009 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005010 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00005011 }
Guido van Rossume5372401993-03-16 12:15:04 +00005012 return NULL;
5013}
Guido van Rossum2a61e741997-01-18 07:55:05 +00005014
Guido van Rossum2a61e741997-01-18 07:55:05 +00005015void
Fred Drakeba096332000-07-09 07:04:36 +00005016PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005017{
5018 register PyStringObject *s = (PyStringObject *)(*p);
5019 PyObject *t;
5020 if (s == NULL || !PyString_Check(s))
5021 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005022 /* If it's a string subclass, we don't really know what putting
5023 it in the interned dict might do. */
5024 if (!PyString_CheckExact(s))
5025 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005026 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005027 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005028 if (interned == NULL) {
5029 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005030 if (interned == NULL) {
5031 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005032 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005033 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005034 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005035 t = PyDict_GetItem(interned, (PyObject *)s);
5036 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005037 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005038 Py_DECREF(*p);
5039 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005040 return;
5041 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005042
Armin Rigo79f7ad22004-08-07 19:27:39 +00005043 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005044 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005045 return;
5046 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005047 /* The two references in interned are not counted by refcnt.
5048 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00005049 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005050 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005051}
5052
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005053void
5054PyString_InternImmortal(PyObject **p)
5055{
5056 PyString_InternInPlace(p);
5057 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5058 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5059 Py_INCREF(*p);
5060 }
5061}
5062
Guido van Rossum2a61e741997-01-18 07:55:05 +00005063
5064PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005065PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005066{
5067 PyObject *s = PyString_FromString(cp);
5068 if (s == NULL)
5069 return NULL;
5070 PyString_InternInPlace(&s);
5071 return s;
5072}
5073
Guido van Rossum8cf04761997-08-02 02:57:45 +00005074void
Fred Drakeba096332000-07-09 07:04:36 +00005075PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005076{
5077 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005078 for (i = 0; i < UCHAR_MAX + 1; i++) {
5079 Py_XDECREF(characters[i]);
5080 characters[i] = NULL;
5081 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005082 Py_XDECREF(nullstring);
5083 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005084}
Barry Warsawa903ad982001-02-23 16:40:48 +00005085
Barry Warsawa903ad982001-02-23 16:40:48 +00005086void _Py_ReleaseInternedStrings(void)
5087{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005088 PyObject *keys;
5089 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005090 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005091
5092 if (interned == NULL || !PyDict_Check(interned))
5093 return;
5094 keys = PyDict_Keys(interned);
5095 if (keys == NULL || !PyList_Check(keys)) {
5096 PyErr_Clear();
5097 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005098 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005099
5100 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5101 detector, interned strings are not forcibly deallocated; rather, we
5102 give them their stolen references back, and then clear and DECREF
5103 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005104
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005105 fprintf(stderr, "releasing interned strings\n");
5106 n = PyList_GET_SIZE(keys);
5107 for (i = 0; i < n; i++) {
5108 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5109 switch (s->ob_sstate) {
5110 case SSTATE_NOT_INTERNED:
5111 /* XXX Shouldn't happen */
5112 break;
5113 case SSTATE_INTERNED_IMMORTAL:
5114 s->ob_refcnt += 1;
5115 break;
5116 case SSTATE_INTERNED_MORTAL:
5117 s->ob_refcnt += 2;
5118 break;
5119 default:
5120 Py_FatalError("Inconsistent interned string state.");
5121 }
5122 s->ob_sstate = SSTATE_NOT_INTERNED;
5123 }
5124 Py_DECREF(keys);
5125 PyDict_Clear(interned);
5126 Py_DECREF(interned);
5127 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005128}