blob: 7fade569c3ef8d2eb8d96480655b2852c67a7d90 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000073 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000074 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000076 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000078 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000082 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000100PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101{
Tim Peters62de65b2001-12-06 20:29:32 +0000102 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000103 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000104
105 assert(str != NULL);
106 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000107 if (size > INT_MAX) {
108 PyErr_SetString(PyExc_OverflowError,
109 "string is too long for a Python string");
110 return NULL;
111 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0 && (op = nullstring) != NULL) {
113#ifdef COUNT_ALLOCS
114 null_strings++;
115#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
120#ifdef COUNT_ALLOCS
121 one_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000127 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000128 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000133 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000134 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000135 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000137 PyObject *t = (PyObject *)op;
138 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000139 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Barry Warsawdadace02001-08-24 18:32:06 +0000152PyObject *
153PyString_FromFormatV(const char *format, va_list vargs)
154{
Tim Petersc15c4f12001-10-02 21:32:07 +0000155 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 int n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
160
Tim Petersc15c4f12001-10-02 21:32:07 +0000161#ifdef VA_LIST_IS_ARRAY
162 memcpy(count, vargs, sizeof(va_list));
163#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000167 count = vargs;
168#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000169#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
175 ;
176
177 /* skip the 'l' in %ld, since it doesn't change the
178 width. although only %d is supported (see
179 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000180 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000181 if (*f == 'l' && *(f+1) == 'd')
182 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000183
Barry Warsawdadace02001-08-24 18:32:06 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'i': case 'x':
192 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000217 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000228 string = PyString_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231
Barry Warsawdadace02001-08-24 18:32:06 +0000232 s = PyString_AsString(string);
233
234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 int i, longflag = 0;
238 /* parse the width.precision part (we're only
239 interested in the precision value, if any) */
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 if (*f == '.') {
244 f++;
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 }
249 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
250 f++;
251 /* handle the long flag, but only for %ld. others
252 can be added when necessary. */
253 if (*f == 'l' && *(f+1) == 'd') {
254 longflag = 1;
255 ++f;
256 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000257
Barry Warsawdadace02001-08-24 18:32:06 +0000258 switch (*f) {
259 case 'c':
260 *s++ = va_arg(vargs, int);
261 break;
262 case 'd':
263 if (longflag)
264 sprintf(s, "%ld", va_arg(vargs, long));
265 else
266 sprintf(s, "%d", va_arg(vargs, int));
267 s += strlen(s);
268 break;
269 case 'i':
270 sprintf(s, "%i", va_arg(vargs, int));
271 s += strlen(s);
272 break;
273 case 'x':
274 sprintf(s, "%x", va_arg(vargs, int));
275 s += strlen(s);
276 break;
277 case 's':
278 p = va_arg(vargs, char*);
279 i = strlen(p);
280 if (n > 0 && i > n)
281 i = n;
282 memcpy(s, p, i);
283 s += i;
284 break;
285 case 'p':
286 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000287 /* %p is ill-defined: ensure leading 0x. */
288 if (s[1] == 'X')
289 s[1] = 'x';
290 else if (s[1] != 'x') {
291 memmove(s+2, s, strlen(s)+1);
292 s[0] = '0';
293 s[1] = 'x';
294 }
Barry Warsawdadace02001-08-24 18:32:06 +0000295 s += strlen(s);
296 break;
297 case '%':
298 *s++ = '%';
299 break;
300 default:
301 strcpy(s, p);
302 s += strlen(s);
303 goto end;
304 }
305 } else
306 *s++ = *f;
307 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000311 return string;
312}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000313
Barry Warsawdadace02001-08-24 18:32:06 +0000314PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000316{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000317 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318 va_list vargs;
319
320#ifdef HAVE_STDARG_PROTOTYPES
321 va_start(vargs, format);
322#else
323 va_start(vargs);
324#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000325 ret = PyString_FromFormatV(format, vargs);
326 va_end(vargs);
327 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000328}
329
330
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000331PyObject *PyString_Decode(const char *s,
332 int size,
333 const char *encoding,
334 const char *errors)
335{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000336 PyObject *v, *str;
337
338 str = PyString_FromStringAndSize(s, size);
339 if (str == NULL)
340 return NULL;
341 v = PyString_AsDecodedString(str, encoding, errors);
342 Py_DECREF(str);
343 return v;
344}
345
346PyObject *PyString_AsDecodedObject(PyObject *str,
347 const char *encoding,
348 const char *errors)
349{
350 PyObject *v;
351
352 if (!PyString_Check(str)) {
353 PyErr_BadArgument();
354 goto onError;
355 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000356
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000357 if (encoding == NULL) {
358#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000360#else
361 PyErr_SetString(PyExc_ValueError, "no encoding specified");
362 goto onError;
363#endif
364 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365
366 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000367 v = PyCodec_Decode(str, encoding, errors);
368 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000370
371 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000372
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000374 return NULL;
375}
376
377PyObject *PyString_AsDecodedString(PyObject *str,
378 const char *encoding,
379 const char *errors)
380{
381 PyObject *v;
382
383 v = PyString_AsDecodedObject(str, encoding, errors);
384 if (v == NULL)
385 goto onError;
386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 /* Convert Unicode to a string using the default encoding */
389 if (PyUnicode_Check(v)) {
390 PyObject *temp = v;
391 v = PyUnicode_AsEncodedString(v, NULL, NULL);
392 Py_DECREF(temp);
393 if (v == NULL)
394 goto onError;
395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 if (!PyString_Check(v)) {
398 PyErr_Format(PyExc_TypeError,
399 "decoder did not return a string object (type=%.400s)",
400 v->ob_type->tp_name);
401 Py_DECREF(v);
402 goto onError;
403 }
404
405 return v;
406
407 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 return NULL;
409}
410
411PyObject *PyString_Encode(const char *s,
412 int size,
413 const char *encoding,
414 const char *errors)
415{
416 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000417
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 str = PyString_FromStringAndSize(s, size);
419 if (str == NULL)
420 return NULL;
421 v = PyString_AsEncodedString(str, encoding, errors);
422 Py_DECREF(str);
423 return v;
424}
425
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000426PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 if (!PyString_Check(str)) {
433 PyErr_BadArgument();
434 goto onError;
435 }
436
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000437 if (encoding == NULL) {
438#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000440#else
441 PyErr_SetString(PyExc_ValueError, "no encoding specified");
442 goto onError;
443#endif
444 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445
446 /* Encode via the codec registry */
447 v = PyCodec_Encode(str, encoding, errors);
448 if (v == NULL)
449 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450
451 return v;
452
453 onError:
454 return NULL;
455}
456
457PyObject *PyString_AsEncodedString(PyObject *str,
458 const char *encoding,
459 const char *errors)
460{
461 PyObject *v;
462
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000463 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464 if (v == NULL)
465 goto onError;
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 /* Convert Unicode to a string using the default encoding */
469 if (PyUnicode_Check(v)) {
470 PyObject *temp = v;
471 v = PyUnicode_AsEncodedString(v, NULL, NULL);
472 Py_DECREF(temp);
473 if (v == NULL)
474 goto onError;
475 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 if (!PyString_Check(v)) {
478 PyErr_Format(PyExc_TypeError,
479 "encoder did not return a string object (type=%.400s)",
480 v->ob_type->tp_name);
481 Py_DECREF(v);
482 goto onError;
483 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000484
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000485 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 onError:
488 return NULL;
489}
490
Guido van Rossum234f9421993-06-17 12:35:49 +0000491static void
Fred Drakeba096332000-07-09 07:04:36 +0000492string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000493{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000494 switch (PyString_CHECK_INTERNED(op)) {
495 case SSTATE_NOT_INTERNED:
496 break;
497
498 case SSTATE_INTERNED_MORTAL:
499 /* revive dead object temporarily for DelItem */
500 op->ob_refcnt = 3;
501 if (PyDict_DelItem(interned, op) != 0)
502 Py_FatalError(
503 "deletion of interned string failed");
504 break;
505
506 case SSTATE_INTERNED_IMMORTAL:
507 Py_FatalError("Immortal interned string died.");
508
509 default:
510 Py_FatalError("Inconsistent interned string state.");
511 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000512 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000513}
514
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000515/* Unescape a backslash-escaped string. If unicode is non-zero,
516 the string is a u-literal. If recode_encoding is non-zero,
517 the string is UTF-8 encoded and should be re-encoded in the
518 specified encoding. */
519
520PyObject *PyString_DecodeEscape(const char *s,
521 int len,
522 const char *errors,
523 int unicode,
524 const char *recode_encoding)
525{
526 int c;
527 char *p, *buf;
528 const char *end;
529 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000530 int newlen = recode_encoding ? 4*len:len;
531 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 if (v == NULL)
533 return NULL;
534 p = buf = PyString_AsString(v);
535 end = s + len;
536 while (s < end) {
537 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000538 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539#ifdef Py_USING_UNICODE
540 if (recode_encoding && (*s & 0x80)) {
541 PyObject *u, *w;
542 char *r;
543 const char* t;
544 int rn;
545 t = s;
546 /* Decode non-ASCII bytes as UTF-8. */
547 while (t < end && (*t & 0x80)) t++;
548 u = PyUnicode_DecodeUTF8(s, t - s, errors);
549 if(!u) goto failed;
550
551 /* Recode them in target encoding. */
552 w = PyUnicode_AsEncodedString(
553 u, recode_encoding, errors);
554 Py_DECREF(u);
555 if (!w) goto failed;
556
557 /* Append bytes to output buffer. */
558 r = PyString_AsString(w);
559 rn = PyString_Size(w);
560 memcpy(p, r, rn);
561 p += rn;
562 Py_DECREF(w);
563 s = t;
564 } else {
565 *p++ = *s++;
566 }
567#else
568 *p++ = *s++;
569#endif
570 continue;
571 }
572 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000573 if (s==end) {
574 PyErr_SetString(PyExc_ValueError,
575 "Trailing \\ in string");
576 goto failed;
577 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 switch (*s++) {
579 /* XXX This assumes ASCII! */
580 case '\n': break;
581 case '\\': *p++ = '\\'; break;
582 case '\'': *p++ = '\''; break;
583 case '\"': *p++ = '\"'; break;
584 case 'b': *p++ = '\b'; break;
585 case 'f': *p++ = '\014'; break; /* FF */
586 case 't': *p++ = '\t'; break;
587 case 'n': *p++ = '\n'; break;
588 case 'r': *p++ = '\r'; break;
589 case 'v': *p++ = '\013'; break; /* VT */
590 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
591 case '0': case '1': case '2': case '3':
592 case '4': case '5': case '6': case '7':
593 c = s[-1] - '0';
594 if ('0' <= *s && *s <= '7') {
595 c = (c<<3) + *s++ - '0';
596 if ('0' <= *s && *s <= '7')
597 c = (c<<3) + *s++ - '0';
598 }
599 *p++ = c;
600 break;
601 case 'x':
602 if (isxdigit(Py_CHARMASK(s[0]))
603 && isxdigit(Py_CHARMASK(s[1]))) {
604 unsigned int x = 0;
605 c = Py_CHARMASK(*s);
606 s++;
607 if (isdigit(c))
608 x = c - '0';
609 else if (islower(c))
610 x = 10 + c - 'a';
611 else
612 x = 10 + c - 'A';
613 x = x << 4;
614 c = Py_CHARMASK(*s);
615 s++;
616 if (isdigit(c))
617 x += c - '0';
618 else if (islower(c))
619 x += 10 + c - 'a';
620 else
621 x += 10 + c - 'A';
622 *p++ = x;
623 break;
624 }
625 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 PyErr_SetString(PyExc_ValueError,
627 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000628 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 }
630 if (strcmp(errors, "replace") == 0) {
631 *p++ = '?';
632 } else if (strcmp(errors, "ignore") == 0)
633 /* do nothing */;
634 else {
635 PyErr_Format(PyExc_ValueError,
636 "decoding error; "
637 "unknown error handling code: %.400s",
638 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000639 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 }
641#ifndef Py_USING_UNICODE
642 case 'u':
643 case 'U':
644 case 'N':
645 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000646 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 "Unicode escapes not legal "
648 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000649 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000650 }
651#endif
652 default:
653 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000654 s--;
655 goto non_esc; /* an arbitry number of unescaped
656 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 }
658 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000659 if (p-buf < newlen)
660 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 return v;
662 failed:
663 Py_DECREF(v);
664 return NULL;
665}
666
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667static int
668string_getsize(register PyObject *op)
669{
670 char *s;
671 int len;
672 if (PyString_AsStringAndSize(op, &s, &len))
673 return -1;
674 return len;
675}
676
677static /*const*/ char *
678string_getbuffer(register PyObject *op)
679{
680 char *s;
681 int len;
682 if (PyString_AsStringAndSize(op, &s, &len))
683 return NULL;
684 return s;
685}
686
Guido van Rossumd7047b31995-01-02 19:07:15 +0000687int
Fred Drakeba096332000-07-09 07:04:36 +0000688PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000690 if (!PyString_Check(op))
691 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693}
694
695/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000696PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (!PyString_Check(op))
699 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000703int
704PyString_AsStringAndSize(register PyObject *obj,
705 register char **s,
706 register int *len)
707{
708 if (s == NULL) {
709 PyErr_BadInternalCall();
710 return -1;
711 }
712
713 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000714#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (PyUnicode_Check(obj)) {
716 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
717 if (obj == NULL)
718 return -1;
719 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000720 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000721#endif
722 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 PyErr_Format(PyExc_TypeError,
724 "expected string or Unicode object, "
725 "%.200s found", obj->ob_type->tp_name);
726 return -1;
727 }
728 }
729
730 *s = PyString_AS_STRING(obj);
731 if (len != NULL)
732 *len = PyString_GET_SIZE(obj);
733 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
734 PyErr_SetString(PyExc_TypeError,
735 "expected string without null bytes");
736 return -1;
737 }
738 return 0;
739}
740
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741/* Methods */
742
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000743static int
Fred Drakeba096332000-07-09 07:04:36 +0000744string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745{
746 int i;
747 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000748 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000749
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000750 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000751 if (! PyString_CheckExact(op)) {
752 int ret;
753 /* A str subclass may have its own __str__ method. */
754 op = (PyStringObject *) PyObject_Str((PyObject *)op);
755 if (op == NULL)
756 return -1;
757 ret = string_print(op, fp, flags);
758 Py_DECREF(op);
759 return ret;
760 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000762#ifdef __VMS
763 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
764#else
765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
766#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000769
Thomas Wouters7e474022000-07-16 12:04:32 +0000770 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000772 if (memchr(op->ob_sval, '\'', op->ob_size) &&
773 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 quote = '"';
775
776 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 for (i = 0; i < op->ob_size; i++) {
778 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000779 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000780 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000781 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\r");
787 else if (c < ' ' || c >= 0x7f)
788 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000789 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794}
795
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000796PyObject *
797PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000799 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000800 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000801 PyObject *v;
802 if (newsize > INT_MAX) {
803 PyErr_SetString(PyExc_OverflowError,
804 "string is too large to make repr");
805 }
806 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 }
810 else {
811 register int i;
812 register char c;
813 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 int quote;
815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000818 if (smartquotes &&
819 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000820 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 quote = '"';
822
Tim Peters9161c8b2001-12-03 01:55:38 +0000823 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000826 /* There's at least enough room for a hex escape
827 and a closing quote. */
828 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000832 else if (c == '\t')
833 *p++ = '\\', *p++ = 't';
834 else if (c == '\n')
835 *p++ = '\\', *p++ = 'n';
836 else if (c == '\r')
837 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000838 else if (c < ' ' || c >= 0x7f) {
839 /* For performance, we don't want to call
840 PyOS_snprintf here (extra layers of
841 function call). */
842 sprintf(p, "\\x%02x", c & 0xff);
843 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000844 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 else
846 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Guido van Rossum189f1df2001-05-01 16:51:53 +0000857static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858string_repr(PyObject *op)
859{
860 return PyString_Repr(op, 1);
861}
862
863static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000864string_str(PyObject *s)
865{
Tim Petersc9933152001-10-16 20:18:24 +0000866 assert(PyString_Check(s));
867 if (PyString_CheckExact(s)) {
868 Py_INCREF(s);
869 return s;
870 }
871 else {
872 /* Subtype -- return genuine string with the same value. */
873 PyStringObject *t = (PyStringObject *) s;
874 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
875 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000876}
877
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878static int
Fred Drakeba096332000-07-09 07:04:36 +0000879string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880{
881 return a->ob_size;
882}
883
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000884static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000885string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 register PyStringObject *op;
889 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 if (PyUnicode_Check(bb))
892 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000894 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000895 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000896 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 return NULL;
898 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000901 if ((a->ob_size == 0 || b->ob_size == 0) &&
902 PyString_CheckExact(a) && PyString_CheckExact(b)) {
903 if (a->ob_size == 0) {
904 Py_INCREF(bb);
905 return bb;
906 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 Py_INCREF(a);
908 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
910 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000911 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000912 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000929 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000930 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000932 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 if (n < 0)
934 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000935 /* watch out for overflows: the size can overflow int,
936 * and the # of bytes needed can overflow size_t
937 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000939 if (n && size / n != a->ob_size) {
940 PyErr_SetString(PyExc_OverflowError,
941 "repeated string is too long");
942 return NULL;
943 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 Py_INCREF(a);
946 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 }
Tim Peterse7c05322004-06-27 17:24:49 +0000948 nbytes = (size_t)size;
949 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000961 op->ob_sval[size] = '\0';
962 if (a->ob_size == 1 && n > 0) {
963 memset(op->ob_sval, a->ob_sval[0] , n);
964 return (PyObject *) op;
965 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000966 i = 0;
967 if (i < size) {
968 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
969 i = (int) a->ob_size;
970 }
971 while (i < size) {
972 j = (i <= size-i) ? i : size-i;
973 memcpy(op->ob_sval+i, op->ob_sval, j);
974 i += j;
975 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977}
978
979/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_slice(register PyStringObject *a, register int i, register int j)
983 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984{
985 if (i < 0)
986 i = 0;
987 if (j < 0)
988 j = 0; /* Avoid signed/unsigned bug in next line */
989 if (j > a->ob_size)
990 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000991 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
992 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 Py_INCREF(a);
994 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995 }
996 if (j < i)
997 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999}
1000
Guido van Rossum9284a572000-03-07 15:53:43 +00001001static int
Fred Drakeba096332000-07-09 07:04:36 +00001002string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001003{
Barry Warsaw817918c2002-08-06 16:58:21 +00001004 const char *lhs, *rhs, *end;
1005 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006
1007 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001008#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001009 if (PyUnicode_Check(el))
1010 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 if (!PyString_Check(el)) {
1013 PyErr_SetString(PyExc_TypeError,
1014 "'in <string>' requires string as left operand");
1015 return -1;
1016 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001018 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001019 rhs = PyString_AS_STRING(el);
1020 lhs = PyString_AS_STRING(a);
1021
1022 /* optimize for a single character */
1023 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001024 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001025
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 while (lhs <= end) {
1028 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001029 return 1;
1030 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001031
Guido van Rossum9284a572000-03-07 15:53:43 +00001032 return 0;
1033}
1034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001036string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001039 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 return NULL;
1043 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001044 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001045 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001046 if (v == NULL)
1047 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001048 else {
1049#ifdef COUNT_ALLOCS
1050 one_strings++;
1051#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001052 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001053 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001054 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055}
1056
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057static PyObject*
1058string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001060 int c;
1061 int len_a, len_b;
1062 int min_len;
1063 PyObject *result;
1064
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001065 /* Make sure both arguments are strings. */
1066 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001067 result = Py_NotImplemented;
1068 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001069 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001070 if (a == b) {
1071 switch (op) {
1072 case Py_EQ:case Py_LE:case Py_GE:
1073 result = Py_True;
1074 goto out;
1075 case Py_NE:case Py_LT:case Py_GT:
1076 result = Py_False;
1077 goto out;
1078 }
1079 }
1080 if (op == Py_EQ) {
1081 /* Supporting Py_NE here as well does not save
1082 much time, since Py_NE is rarely used. */
1083 if (a->ob_size == b->ob_size
1084 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001085 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086 a->ob_size) == 0)) {
1087 result = Py_True;
1088 } else {
1089 result = Py_False;
1090 }
1091 goto out;
1092 }
1093 len_a = a->ob_size; len_b = b->ob_size;
1094 min_len = (len_a < len_b) ? len_a : len_b;
1095 if (min_len > 0) {
1096 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1097 if (c==0)
1098 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1099 }else
1100 c = 0;
1101 if (c == 0)
1102 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1103 switch (op) {
1104 case Py_LT: c = c < 0; break;
1105 case Py_LE: c = c <= 0; break;
1106 case Py_EQ: assert(0); break; /* unreachable */
1107 case Py_NE: c = c != 0; break;
1108 case Py_GT: c = c > 0; break;
1109 case Py_GE: c = c >= 0; break;
1110 default:
1111 result = Py_NotImplemented;
1112 goto out;
1113 }
1114 result = c ? Py_True : Py_False;
1115 out:
1116 Py_INCREF(result);
1117 return result;
1118}
1119
1120int
1121_PyString_Eq(PyObject *o1, PyObject *o2)
1122{
1123 PyStringObject *a, *b;
1124 a = (PyStringObject*)o1;
1125 b = (PyStringObject*)o2;
1126 return a->ob_size == b->ob_size
1127 && *a->ob_sval == *b->ob_sval
1128 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001129}
1130
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131static long
Fred Drakeba096332000-07-09 07:04:36 +00001132string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001134 register int len;
1135 register unsigned char *p;
1136 register long x;
1137
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 if (a->ob_shash != -1)
1139 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001140 len = a->ob_size;
1141 p = (unsigned char *) a->ob_sval;
1142 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001143 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001144 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145 x ^= a->ob_size;
1146 if (x == -1)
1147 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 return x;
1150}
1151
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001152static PyObject*
1153string_subscript(PyStringObject* self, PyObject* item)
1154{
1155 if (PyInt_Check(item)) {
1156 long i = PyInt_AS_LONG(item);
1157 if (i < 0)
1158 i += PyString_GET_SIZE(self);
1159 return string_item(self,i);
1160 }
1161 else if (PyLong_Check(item)) {
1162 long i = PyLong_AsLong(item);
1163 if (i == -1 && PyErr_Occurred())
1164 return NULL;
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PySlice_Check(item)) {
1170 int start, stop, step, slicelength, cur, i;
1171 char* source_buf;
1172 char* result_buf;
1173 PyObject* result;
1174
1175 if (PySlice_GetIndicesEx((PySliceObject*)item,
1176 PyString_GET_SIZE(self),
1177 &start, &stop, &step, &slicelength) < 0) {
1178 return NULL;
1179 }
1180
1181 if (slicelength <= 0) {
1182 return PyString_FromStringAndSize("", 0);
1183 }
1184 else {
1185 source_buf = PyString_AsString((PyObject*)self);
1186 result_buf = PyMem_Malloc(slicelength);
1187
1188 for (cur = start, i = 0; i < slicelength;
1189 cur += step, i++) {
1190 result_buf[i] = source_buf[cur];
1191 }
1192
1193 result = PyString_FromStringAndSize(result_buf,
1194 slicelength);
1195 PyMem_Free(result_buf);
1196 return result;
1197 }
1198 }
1199 else {
1200 PyErr_SetString(PyExc_TypeError,
1201 "string indices must be integers");
1202 return NULL;
1203 }
1204}
1205
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
1209 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001210 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001211 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212 return -1;
1213 }
1214 *ptr = (void *)self->ob_sval;
1215 return self->ob_size;
1216}
1217
1218static int
Fred Drakeba096332000-07-09 07:04:36 +00001219string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220{
Guido van Rossum045e6881997-09-08 18:30:11 +00001221 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001222 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223 return -1;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
1229 if ( lenp )
1230 *lenp = self->ob_size;
1231 return 1;
1232}
1233
Guido van Rossum1db70701998-10-08 02:18:52 +00001234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001236{
1237 if ( index != 0 ) {
1238 PyErr_SetString(PyExc_SystemError,
1239 "accessing non-existent string segment");
1240 return -1;
1241 }
1242 *ptr = self->ob_sval;
1243 return self->ob_size;
1244}
1245
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001246static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001247 (inquiry)string_length, /*sq_length*/
1248 (binaryfunc)string_concat, /*sq_concat*/
1249 (intargfunc)string_repeat, /*sq_repeat*/
1250 (intargfunc)string_item, /*sq_item*/
1251 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001252 0, /*sq_ass_item*/
1253 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001254 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001255};
1256
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001257static PyMappingMethods string_as_mapping = {
1258 (inquiry)string_length,
1259 (binaryfunc)string_subscript,
1260 0,
1261};
1262
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263static PyBufferProcs string_as_buffer = {
1264 (getreadbufferproc)string_buffer_getreadbuf,
1265 (getwritebufferproc)string_buffer_getwritebuf,
1266 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001267 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268};
1269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
1271
1272#define LEFTSTRIP 0
1273#define RIGHTSTRIP 1
1274#define BOTHSTRIP 2
1275
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001276/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001277static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1278
1279#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001281#define SPLIT_APPEND(data, left, right) \
1282 str = PyString_FromStringAndSize((data) + (left), \
1283 (right) - (left)); \
1284 if (str == NULL) \
1285 goto onError; \
1286 if (PyList_Append(list, str)) { \
1287 Py_DECREF(str); \
1288 goto onError; \
1289 } \
1290 else \
1291 Py_DECREF(str);
1292
1293#define SPLIT_INSERT(data, left, right) \
1294 str = PyString_FromStringAndSize((data) + (left), \
1295 (right) - (left)); \
1296 if (str == NULL) \
1297 goto onError; \
1298 if (PyList_Insert(list, 0, str)) { \
1299 Py_DECREF(str); \
1300 goto onError; \
1301 } \
1302 else \
1303 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001306split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001308 int i, j;
1309 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 PyObject *list = PyList_New(0);
1311
1312 if (list == NULL)
1313 return NULL;
1314
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 for (i = j = 0; i < len; ) {
1316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 while (i < len && !isspace(Py_CHARMASK(s[i])))
1320 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 if (maxsplit-- <= 0)
1323 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001325 while (i < len && isspace(Py_CHARMASK(s[i])))
1326 i++;
1327 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 }
1329 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001331 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 Py_DECREF(list);
1336 return NULL;
1337}
1338
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339static PyObject *
1340split_char(const char *s, int len, char ch, int maxcount)
1341{
1342 register int i, j;
1343 PyObject *str;
1344 PyObject *list = PyList_New(0);
1345
1346 if (list == NULL)
1347 return NULL;
1348
1349 for (i = j = 0; i < len; ) {
1350 if (s[i] == ch) {
1351 if (maxcount-- <= 0)
1352 break;
1353 SPLIT_APPEND(s, j, i);
1354 i = j = i + 1;
1355 } else
1356 i++;
1357 }
1358 if (j <= len) {
1359 SPLIT_APPEND(s, j, len);
1360 }
1361 return list;
1362
1363 onError:
1364 Py_DECREF(list);
1365 return NULL;
1366}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001368PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369"S.split([sep [,maxsplit]]) -> list of strings\n\
1370\n\
1371Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001373splits are done. If sep is not specified or is None, any\n\
1374whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375
1376static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001377string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378{
1379 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 int maxsplit = -1;
1381 const char *s = PyString_AS_STRING(self), *sub;
1382 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Guido van Rossum4c08d552000-03-10 22:55:18 +00001384 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 if (maxsplit < 0)
1387 maxsplit = INT_MAX;
1388 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (PyString_Check(subobj)) {
1391 sub = PyString_AS_STRING(subobj);
1392 n = PyString_GET_SIZE(subobj);
1393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001394#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 else if (PyUnicode_Check(subobj))
1396 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1399 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 if (n == 0) {
1402 PyErr_SetString(PyExc_ValueError, "empty separator");
1403 return NULL;
1404 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001405 else if (n == 1)
1406 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407
1408 list = PyList_New(0);
1409 if (list == NULL)
1410 return NULL;
1411
1412 i = j = 0;
1413 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001414 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 if (maxsplit-- <= 0)
1416 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1418 if (item == NULL)
1419 goto fail;
1420 err = PyList_Append(list, item);
1421 Py_DECREF(item);
1422 if (err < 0)
1423 goto fail;
1424 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
1426 else
1427 i++;
1428 }
1429 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1430 if (item == NULL)
1431 goto fail;
1432 err = PyList_Append(list, item);
1433 Py_DECREF(item);
1434 if (err < 0)
1435 goto fail;
1436
1437 return list;
1438
1439 fail:
1440 Py_DECREF(list);
1441 return NULL;
1442}
1443
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001444static PyObject *
1445rsplit_whitespace(const char *s, int len, int maxsplit)
1446{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 int i, j;
1448 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001449 PyObject *list = PyList_New(0);
1450
1451 if (list == NULL)
1452 return NULL;
1453
1454 for (i = j = len - 1; i >= 0; ) {
1455 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1456 i--;
1457 j = i;
1458 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1459 i--;
1460 if (j > i) {
1461 if (maxsplit-- <= 0)
1462 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001464 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1465 i--;
1466 j = i;
1467 }
1468 }
1469 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001471 }
1472 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474 Py_DECREF(list);
1475 return NULL;
1476}
1477
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478static PyObject *
1479rsplit_char(const char *s, int len, char ch, int maxcount)
1480{
1481 register int i, j;
1482 PyObject *str;
1483 PyObject *list = PyList_New(0);
1484
1485 if (list == NULL)
1486 return NULL;
1487
1488 for (i = j = len - 1; i >= 0; ) {
1489 if (s[i] == ch) {
1490 if (maxcount-- <= 0)
1491 break;
1492 SPLIT_INSERT(s, i + 1, j + 1);
1493 j = i = i - 1;
1494 } else
1495 i--;
1496 }
1497 if (j >= -1) {
1498 SPLIT_INSERT(s, 0, j + 1);
1499 }
1500 return list;
1501
1502 onError:
1503 Py_DECREF(list);
1504 return NULL;
1505}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001506
1507PyDoc_STRVAR(rsplit__doc__,
1508"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1509\n\
1510Return a list of the words in the string S, using sep as the\n\
1511delimiter string, starting at the end of the string and working\n\
1512to the front. If maxsplit is given, at most maxsplit splits are\n\
1513done. If sep is not specified or is None, any whitespace string\n\
1514is a separator.");
1515
1516static PyObject *
1517string_rsplit(PyStringObject *self, PyObject *args)
1518{
1519 int len = PyString_GET_SIZE(self), n, i, j, err;
1520 int maxsplit = -1;
1521 const char *s = PyString_AS_STRING(self), *sub;
1522 PyObject *list, *item, *subobj = Py_None;
1523
1524 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1525 return NULL;
1526 if (maxsplit < 0)
1527 maxsplit = INT_MAX;
1528 if (subobj == Py_None)
1529 return rsplit_whitespace(s, len, maxsplit);
1530 if (PyString_Check(subobj)) {
1531 sub = PyString_AS_STRING(subobj);
1532 n = PyString_GET_SIZE(subobj);
1533 }
1534#ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(subobj))
1536 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1537#endif
1538 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1539 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541 if (n == 0) {
1542 PyErr_SetString(PyExc_ValueError, "empty separator");
1543 return NULL;
1544 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001545 else if (n == 1)
1546 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001547
1548 list = PyList_New(0);
1549 if (list == NULL)
1550 return NULL;
1551
1552 j = len;
1553 i = j - n;
1554 while (i >= 0) {
1555 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1556 if (maxsplit-- <= 0)
1557 break;
1558 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1559 if (item == NULL)
1560 goto fail;
1561 err = PyList_Insert(list, 0, item);
1562 Py_DECREF(item);
1563 if (err < 0)
1564 goto fail;
1565 j = i;
1566 i -= n;
1567 }
1568 else
1569 i--;
1570 }
1571 item = PyString_FromStringAndSize(s, j);
1572 if (item == NULL)
1573 goto fail;
1574 err = PyList_Insert(list, 0, item);
1575 Py_DECREF(item);
1576 if (err < 0)
1577 goto fail;
1578
1579 return list;
1580
1581 fail:
1582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.join(sequence) -> string\n\
1589\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592
1593static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001594string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595{
1596 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001597 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 char *p;
1600 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001601 size_t sz = 0;
1602 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001603 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 seq = PySequence_Fast(orig, "");
1606 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001607 if (PyErr_ExceptionMatches(PyExc_TypeError))
1608 PyErr_Format(PyExc_TypeError,
1609 "sequence expected, %.80s found",
1610 orig->ob_type->tp_name);
1611 return NULL;
1612 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001613
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001614 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 if (seqlen == 0) {
1616 Py_DECREF(seq);
1617 return PyString_FromString("");
1618 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001620 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1622 PyErr_Format(PyExc_TypeError,
1623 "sequence item 0: expected string,"
1624 " %.80s found",
1625 item->ob_type->tp_name);
1626 Py_DECREF(seq);
1627 return NULL;
1628 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001629 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001630 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001633
Tim Peters19fe14e2001-01-19 03:03:47 +00001634 /* There are at least two things to join. Do a pre-pass to figure out
1635 * the total amount of space we'll need (sz), see whether any argument
1636 * is absurd, and defer to the Unicode join if appropriate.
1637 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001640 item = PySequence_Fast_GET_ITEM(seq, i);
1641 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001642#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001644 /* Defer to Unicode join.
1645 * CAUTION: There's no gurantee that the
1646 * original sequence can be iterated over
1647 * again, so we must pass seq here.
1648 */
1649 PyObject *result;
1650 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001651 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001652 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001653 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001654#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001656 "sequence item %i: expected string,"
1657 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001658 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001659 Py_DECREF(seq);
1660 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 sz += PyString_GET_SIZE(item);
1663 if (i != 0)
1664 sz += seplen;
1665 if (sz < old_sz || sz > INT_MAX) {
1666 PyErr_SetString(PyExc_OverflowError,
1667 "join() is too long for a Python string");
1668 Py_DECREF(seq);
1669 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001671 }
1672
1673 /* Allocate result space. */
1674 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1675 if (res == NULL) {
1676 Py_DECREF(seq);
1677 return NULL;
1678 }
1679
1680 /* Catenate everything. */
1681 p = PyString_AS_STRING(res);
1682 for (i = 0; i < seqlen; ++i) {
1683 size_t n;
1684 item = PySequence_Fast_GET_ITEM(seq, i);
1685 n = PyString_GET_SIZE(item);
1686 memcpy(p, PyString_AS_STRING(item), n);
1687 p += n;
1688 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 memcpy(p, sep, seplen);
1690 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001691 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693
Jeremy Hylton49048292000-07-11 03:28:17 +00001694 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696}
1697
Tim Peters52e155e2001-06-16 05:42:57 +00001698PyObject *
1699_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001700{
Tim Petersa7259592001-06-16 05:11:17 +00001701 assert(sep != NULL && PyString_Check(sep));
1702 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001704}
1705
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001706static void
1707string_adjust_indices(int *start, int *end, int len)
1708{
1709 if (*end > len)
1710 *end = len;
1711 else if (*end < 0)
1712 *end += len;
1713 if (*end < 0)
1714 *end = 0;
1715 if (*start < 0)
1716 *start += len;
1717 if (*start < 0)
1718 *start = 0;
1719}
1720
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721static long
Fred Drakeba096332000-07-09 07:04:36 +00001722string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001724 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 int len = PyString_GET_SIZE(self);
1726 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001727 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001729 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001730 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 return -2;
1732 if (PyString_Check(subobj)) {
1733 sub = PyString_AS_STRING(subobj);
1734 n = PyString_GET_SIZE(subobj);
1735 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001736#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001738 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001739#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 return -2;
1742
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001743 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 if (dir > 0) {
1746 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 last -= n;
1749 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001750 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 return (long)i;
1752 }
1753 else {
1754 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001755
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 if (n == 0 && i <= last)
1757 return (long)last;
1758 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001759 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 return (long)j;
1761 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001762
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 return -1;
1764}
1765
1766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001767PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768"S.find(sub [,start [,end]]) -> int\n\
1769\n\
1770Return the lowest index in S where substring sub is found,\n\
1771such that sub is contained within s[start,end]. Optional\n\
1772arguments start and end are interpreted as in slice notation.\n\
1773\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001774Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775
1776static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001777string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780 if (result == -2)
1781 return NULL;
1782 return PyInt_FromLong(result);
1783}
1784
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787"S.index(sub [,start [,end]]) -> int\n\
1788\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001789Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790
1791static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001792string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001794 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 if (result == -2)
1796 return NULL;
1797 if (result == -1) {
1798 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001799 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800 return NULL;
1801 }
1802 return PyInt_FromLong(result);
1803}
1804
1805
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807"S.rfind(sub [,start [,end]]) -> int\n\
1808\n\
1809Return the highest index in S where substring sub is found,\n\
1810such that sub is contained within s[start,end]. Optional\n\
1811arguments start and end are interpreted as in slice notation.\n\
1812\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001818 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (result == -2)
1820 return NULL;
1821 return PyInt_FromLong(result);
1822}
1823
1824
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826"S.rindex(sub [,start [,end]]) -> int\n\
1827\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001828Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829
1830static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001831string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001833 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 if (result == -2)
1835 return NULL;
1836 if (result == -1) {
1837 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001838 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 return NULL;
1840 }
1841 return PyInt_FromLong(result);
1842}
1843
1844
1845static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001846do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1847{
1848 char *s = PyString_AS_STRING(self);
1849 int len = PyString_GET_SIZE(self);
1850 char *sep = PyString_AS_STRING(sepobj);
1851 int seplen = PyString_GET_SIZE(sepobj);
1852 int i, j;
1853
1854 i = 0;
1855 if (striptype != RIGHTSTRIP) {
1856 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1857 i++;
1858 }
1859 }
1860
1861 j = len;
1862 if (striptype != LEFTSTRIP) {
1863 do {
1864 j--;
1865 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1866 j++;
1867 }
1868
1869 if (i == 0 && j == len && PyString_CheckExact(self)) {
1870 Py_INCREF(self);
1871 return (PyObject*)self;
1872 }
1873 else
1874 return PyString_FromStringAndSize(s+i, j-i);
1875}
1876
1877
1878static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001879do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880{
1881 char *s = PyString_AS_STRING(self);
1882 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 i = 0;
1885 if (striptype != RIGHTSTRIP) {
1886 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1887 i++;
1888 }
1889 }
1890
1891 j = len;
1892 if (striptype != LEFTSTRIP) {
1893 do {
1894 j--;
1895 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1896 j++;
1897 }
1898
Tim Peters8fa5dd02001-09-12 02:18:30 +00001899 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 Py_INCREF(self);
1901 return (PyObject*)self;
1902 }
1903 else
1904 return PyString_FromStringAndSize(s+i, j-i);
1905}
1906
1907
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001908static PyObject *
1909do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1910{
1911 PyObject *sep = NULL;
1912
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001913 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001914 return NULL;
1915
1916 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001917 if (PyString_Check(sep))
1918 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001919#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001920 else if (PyUnicode_Check(sep)) {
1921 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1922 PyObject *res;
1923 if (uniself==NULL)
1924 return NULL;
1925 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1926 striptype, sep);
1927 Py_DECREF(uniself);
1928 return res;
1929 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001930#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001931 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001933#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001934 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001935#else
1936 "%s arg must be None or str",
1937#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001938 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001939 return NULL;
1940 }
1941 return do_xstrip(self, striptype, sep);
1942 }
1943
1944 return do_strip(self, striptype);
1945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001949"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950\n\
1951Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001952whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955
1956static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, BOTHSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963}
1964
1965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001967"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001970If chars is given and not None, remove characters in chars instead.\n\
1971If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
1973static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976 if (PyTuple_GET_SIZE(args) == 0)
1977 return do_strip(self, LEFTSTRIP); /* Common case */
1978 else
1979 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001984"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001987If chars is given and not None, remove characters in chars instead.\n\
1988If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
1990static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001993 if (PyTuple_GET_SIZE(args) == 0)
1994 return do_strip(self, RIGHTSTRIP); /* Common case */
1995 else
1996 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997}
1998
1999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001"S.lower() -> string\n\
2002\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
2005static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002006string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
2008 char *s = PyString_AS_STRING(self), *s_new;
2009 int i, n = PyString_GET_SIZE(self);
2010 PyObject *new;
2011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 new = PyString_FromStringAndSize(NULL, n);
2013 if (new == NULL)
2014 return NULL;
2015 s_new = PyString_AsString(new);
2016 for (i = 0; i < n; i++) {
2017 int c = Py_CHARMASK(*s++);
2018 if (isupper(c)) {
2019 *s_new = tolower(c);
2020 } else
2021 *s_new = c;
2022 s_new++;
2023 }
2024 return new;
2025}
2026
2027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029"S.upper() -> string\n\
2030\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002031Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032
2033static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002034string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035{
2036 char *s = PyString_AS_STRING(self), *s_new;
2037 int i, n = PyString_GET_SIZE(self);
2038 PyObject *new;
2039
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 new = PyString_FromStringAndSize(NULL, n);
2041 if (new == NULL)
2042 return NULL;
2043 s_new = PyString_AsString(new);
2044 for (i = 0; i < n; i++) {
2045 int c = Py_CHARMASK(*s++);
2046 if (islower(c)) {
2047 *s_new = toupper(c);
2048 } else
2049 *s_new = c;
2050 s_new++;
2051 }
2052 return new;
2053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057"S.title() -> string\n\
2058\n\
2059Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
2062static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002063string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064{
2065 char *s = PyString_AS_STRING(self), *s_new;
2066 int i, n = PyString_GET_SIZE(self);
2067 int previous_is_cased = 0;
2068 PyObject *new;
2069
Guido van Rossum4c08d552000-03-10 22:55:18 +00002070 new = PyString_FromStringAndSize(NULL, n);
2071 if (new == NULL)
2072 return NULL;
2073 s_new = PyString_AsString(new);
2074 for (i = 0; i < n; i++) {
2075 int c = Py_CHARMASK(*s++);
2076 if (islower(c)) {
2077 if (!previous_is_cased)
2078 c = toupper(c);
2079 previous_is_cased = 1;
2080 } else if (isupper(c)) {
2081 if (previous_is_cased)
2082 c = tolower(c);
2083 previous_is_cased = 1;
2084 } else
2085 previous_is_cased = 0;
2086 *s_new++ = c;
2087 }
2088 return new;
2089}
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.capitalize() -> string\n\
2093\n\
2094Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002095capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002098string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
2100 char *s = PyString_AS_STRING(self), *s_new;
2101 int i, n = PyString_GET_SIZE(self);
2102 PyObject *new;
2103
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104 new = PyString_FromStringAndSize(NULL, n);
2105 if (new == NULL)
2106 return NULL;
2107 s_new = PyString_AsString(new);
2108 if (0 < n) {
2109 int c = Py_CHARMASK(*s++);
2110 if (islower(c))
2111 *s_new = toupper(c);
2112 else
2113 *s_new = c;
2114 s_new++;
2115 }
2116 for (i = 1; i < n; i++) {
2117 int c = Py_CHARMASK(*s++);
2118 if (isupper(c))
2119 *s_new = tolower(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
2124 return new;
2125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129"S.count(sub[, start[, end]]) -> int\n\
2130\n\
2131Return the number of occurrences of substring sub in string\n\
2132S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002133interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002136string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 int len = PyString_GET_SIZE(self), n;
2140 int i = 0, last = INT_MAX;
2141 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143
Guido van Rossumc6821402000-05-08 14:08:05 +00002144 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2145 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002147
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 if (PyString_Check(subobj)) {
2149 sub = PyString_AS_STRING(subobj);
2150 n = PyString_GET_SIZE(subobj);
2151 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002152#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002153 else if (PyUnicode_Check(subobj)) {
2154 int count;
2155 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2156 if (count == -1)
2157 return NULL;
2158 else
2159 return PyInt_FromLong((long) count);
2160 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002161#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2163 return NULL;
2164
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002165 string_adjust_indices(&i, &last, len);
2166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 m = last + 1 - n;
2168 if (n == 0)
2169 return PyInt_FromLong((long) (m-i));
2170
2171 r = 0;
2172 while (i < m) {
2173 if (!memcmp(s+i, sub, n)) {
2174 r++;
2175 i += n;
2176 } else {
2177 i++;
2178 }
2179 }
2180 return PyInt_FromLong((long) r);
2181}
2182
2183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185"S.swapcase() -> string\n\
2186\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189
2190static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002191string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192{
2193 char *s = PyString_AS_STRING(self), *s_new;
2194 int i, n = PyString_GET_SIZE(self);
2195 PyObject *new;
2196
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 new = PyString_FromStringAndSize(NULL, n);
2198 if (new == NULL)
2199 return NULL;
2200 s_new = PyString_AsString(new);
2201 for (i = 0; i < n; i++) {
2202 int c = Py_CHARMASK(*s++);
2203 if (islower(c)) {
2204 *s_new = toupper(c);
2205 }
2206 else if (isupper(c)) {
2207 *s_new = tolower(c);
2208 }
2209 else
2210 *s_new = c;
2211 s_new++;
2212 }
2213 return new;
2214}
2215
2216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218"S.translate(table [,deletechars]) -> string\n\
2219\n\
2220Return a copy of the string S, where all characters occurring\n\
2221in the optional argument deletechars are removed, and the\n\
2222remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002223translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224
2225static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002226string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 register char *input, *output;
2229 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 register int i, c, changed = 0;
2231 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 int inlen, tablen, dellen = 0;
2234 PyObject *result;
2235 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002238 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241
2242 if (PyString_Check(tableobj)) {
2243 table1 = PyString_AS_STRING(tableobj);
2244 tablen = PyString_GET_SIZE(tableobj);
2245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002246#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002248 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 parameter; instead a mapping to None will cause characters
2250 to be deleted. */
2251 if (delobj != NULL) {
2252 PyErr_SetString(PyExc_TypeError,
2253 "deletions are implemented differently for unicode");
2254 return NULL;
2255 }
2256 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2257 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002258#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261
Martin v. Löwis00b61272002-12-12 20:03:19 +00002262 if (tablen != 256) {
2263 PyErr_SetString(PyExc_ValueError,
2264 "translation table must be 256 characters long");
2265 return NULL;
2266 }
2267
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 if (delobj != NULL) {
2269 if (PyString_Check(delobj)) {
2270 del_table = PyString_AS_STRING(delobj);
2271 dellen = PyString_GET_SIZE(delobj);
2272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 else if (PyUnicode_Check(delobj)) {
2275 PyErr_SetString(PyExc_TypeError,
2276 "deletions are implemented differently for unicode");
2277 return NULL;
2278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2281 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 }
2283 else {
2284 del_table = NULL;
2285 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 }
2287
2288 table = table1;
2289 inlen = PyString_Size(input_obj);
2290 result = PyString_FromStringAndSize((char *)NULL, inlen);
2291 if (result == NULL)
2292 return NULL;
2293 output_start = output = PyString_AsString(result);
2294 input = PyString_AsString(input_obj);
2295
2296 if (dellen == 0) {
2297 /* If no deletions are required, use faster code */
2298 for (i = inlen; --i >= 0; ) {
2299 c = Py_CHARMASK(*input++);
2300 if (Py_CHARMASK((*output++ = table[c])) != c)
2301 changed = 1;
2302 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002303 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304 return result;
2305 Py_DECREF(result);
2306 Py_INCREF(input_obj);
2307 return input_obj;
2308 }
2309
2310 for (i = 0; i < 256; i++)
2311 trans_table[i] = Py_CHARMASK(table[i]);
2312
2313 for (i = 0; i < dellen; i++)
2314 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2315
2316 for (i = inlen; --i >= 0; ) {
2317 c = Py_CHARMASK(*input++);
2318 if (trans_table[c] != -1)
2319 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2320 continue;
2321 changed = 1;
2322 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002323 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324 Py_DECREF(result);
2325 Py_INCREF(input_obj);
2326 return input_obj;
2327 }
2328 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002329 if (inlen > 0)
2330 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return result;
2332}
2333
2334
2335/* What follows is used for implementing replace(). Perry Stoll. */
2336
2337/*
2338 mymemfind
2339
2340 strstr replacement for arbitrary blocks of memory.
2341
Barry Warsaw51ac5802000-03-20 16:36:48 +00002342 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 contents of memory pointed to by PAT. Returns the index into MEM if
2344 found, or -1 if not found. If len of PAT is greater than length of
2345 MEM, the function returns -1.
2346*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002347static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002348mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349{
2350 register int ii;
2351
2352 /* pattern can not occur in the last pat_len-1 chars */
2353 len -= pat_len;
2354
2355 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002356 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return ii;
2358 }
2359 }
2360 return -1;
2361}
2362
2363/*
2364 mymemcnt
2365
2366 Return the number of distinct times PAT is found in MEM.
2367 meaning mem=1111 and pat==11 returns 2.
2368 mem=11111 and pat==11 also return 2.
2369 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002370static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002371mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372{
2373 register int offset = 0;
2374 int nfound = 0;
2375
2376 while (len >= 0) {
2377 offset = mymemfind(mem, len, pat, pat_len);
2378 if (offset == -1)
2379 break;
2380 mem += offset + pat_len;
2381 len -= offset + pat_len;
2382 nfound++;
2383 }
2384 return nfound;
2385}
2386
2387/*
2388 mymemreplace
2389
Thomas Wouters7e474022000-07-16 12:04:32 +00002390 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 replaced with SUB.
2392
Thomas Wouters7e474022000-07-16 12:04:32 +00002393 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 of PAT in STR, then the original string is returned. Otherwise, a new
2395 string is allocated here and returned.
2396
2397 on return, out_len is:
2398 the length of output string, or
2399 -1 if the input string is returned, or
2400 unchanged if an error occurs (no memory).
2401
2402 return value is:
2403 the new string allocated locally, or
2404 NULL if an error occurred.
2405*/
2406static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002407mymemreplace(const char *str, int len, /* input string */
2408 const char *pat, int pat_len, /* pattern string to find */
2409 const char *sub, int sub_len, /* substitution string */
2410 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002411 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412{
2413 char *out_s;
2414 char *new_s;
2415 int nfound, offset, new_len;
2416
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002417 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 goto return_same;
2419
2420 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002421 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002422 if (count < 0)
2423 count = INT_MAX;
2424 else if (nfound > count)
2425 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 if (nfound == 0)
2427 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002428
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002430 if (new_len == 0) {
2431 /* Have to allocate something for the caller to free(). */
2432 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002433 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002434 return NULL;
2435 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002437 else {
2438 assert(new_len > 0);
2439 new_s = (char *)PyMem_MALLOC(new_len);
2440 if (new_s == NULL)
2441 return NULL;
2442 out_s = new_s;
2443
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002444 if (pat_len > 0) {
2445 for (; nfound > 0; --nfound) {
2446 /* find index of next instance of pattern */
2447 offset = mymemfind(str, len, pat, pat_len);
2448 if (offset == -1)
2449 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002450
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002451 /* copy non matching part of input string */
2452 memcpy(new_s, str, offset);
2453 str += offset + pat_len;
2454 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002455
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002456 /* copy substitute into the output string */
2457 new_s += offset;
2458 memcpy(new_s, sub, sub_len);
2459 new_s += sub_len;
2460 }
2461 /* copy any remaining values into output string */
2462 if (len > 0)
2463 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002464 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002465 else {
2466 for (;;++str, --len) {
2467 memcpy(new_s, sub, sub_len);
2468 new_s += sub_len;
2469 if (--nfound <= 0) {
2470 memcpy(new_s, str, len);
2471 break;
2472 }
2473 *new_s++ = *str;
2474 }
2475 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002476 }
2477 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478 return out_s;
2479
2480 return_same:
2481 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002482 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483}
2484
2485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002486PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002487"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488\n\
2489Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002490old replaced by new. If the optional argument count is\n\
2491given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492
2493static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002494string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 const char *str = PyString_AS_STRING(self), *sub, *repl;
2497 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002498 const int len = PyString_GET_SIZE(self);
2499 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002502 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 if (!PyArg_ParseTuple(args, "OO|i:replace",
2505 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507
2508 if (PyString_Check(subobj)) {
2509 sub = PyString_AS_STRING(subobj);
2510 sub_len = PyString_GET_SIZE(subobj);
2511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002512#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002514 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002516#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2518 return NULL;
2519
2520 if (PyString_Check(replobj)) {
2521 repl = PyString_AS_STRING(replobj);
2522 repl_len = PyString_GET_SIZE(replobj);
2523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002524#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002526 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002528#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2530 return NULL;
2531
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533 if (new_s == NULL) {
2534 PyErr_NoMemory();
2535 return NULL;
2536 }
2537 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002538 if (PyString_CheckExact(self)) {
2539 /* we're returning another reference to self */
2540 new = (PyObject*)self;
2541 Py_INCREF(new);
2542 }
2543 else {
2544 new = PyString_FromStringAndSize(str, len);
2545 if (new == NULL)
2546 return NULL;
2547 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548 }
2549 else {
2550 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002551 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 }
2553 return new;
2554}
2555
2556
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002557PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002558"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002560Return True if S starts with the specified prefix, False otherwise.\n\
2561With optional start, test S beginning at that position.\n\
2562With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
2564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002565string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002568 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570 int plen;
2571 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002572 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574
Guido van Rossumc6821402000-05-08 14:08:05 +00002575 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2576 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577 return NULL;
2578 if (PyString_Check(subobj)) {
2579 prefix = PyString_AS_STRING(subobj);
2580 plen = PyString_GET_SIZE(subobj);
2581 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002582#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002583 else if (PyUnicode_Check(subobj)) {
2584 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002585 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002586 subobj, start, end, -1);
2587 if (rc == -1)
2588 return NULL;
2589 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002590 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002591 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002592#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594 return NULL;
2595
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002596 string_adjust_indices(&start, &end, len);
2597
2598 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002599 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002601 if (end-start >= plen)
2602 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2603 else
2604 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
2607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002608PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002609"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002611Return True if S ends with the specified suffix, False otherwise.\n\
2612With optional start, test S beginning at that position.\n\
2613With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
2615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002616string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002618 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 const char* suffix;
2621 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002623 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625
Guido van Rossumc6821402000-05-08 14:08:05 +00002626 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2627 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 return NULL;
2629 if (PyString_Check(subobj)) {
2630 suffix = PyString_AS_STRING(subobj);
2631 slen = PyString_GET_SIZE(subobj);
2632 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002633#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002634 else if (PyUnicode_Check(subobj)) {
2635 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002636 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002637 subobj, start, end, +1);
2638 if (rc == -1)
2639 return NULL;
2640 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002642 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002643#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002645 return NULL;
2646
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002647 string_adjust_indices(&start, &end, len);
2648
2649 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002650 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002652 if (end-slen > start)
2653 start = end - slen;
2654 if (end-start >= slen)
2655 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2656 else
2657 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658}
2659
2660
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002661PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002662"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002663\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002664Encodes S using the codec registered for encoding. encoding defaults\n\
2665to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002666handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002667a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2668'xmlcharrefreplace' as well as any other name registered with\n\
2669codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002670
2671static PyObject *
2672string_encode(PyStringObject *self, PyObject *args)
2673{
2674 char *encoding = NULL;
2675 char *errors = NULL;
2676 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2677 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002678 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2679}
2680
2681
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002682PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002683"S.decode([encoding[,errors]]) -> object\n\
2684\n\
2685Decodes S using the codec registered for encoding. encoding defaults\n\
2686to the default encoding. errors may be given to set a different error\n\
2687handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002688a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2689as well as any other name registerd with codecs.register_error that is\n\
2690able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002691
2692static PyObject *
2693string_decode(PyStringObject *self, PyObject *args)
2694{
2695 char *encoding = NULL;
2696 char *errors = NULL;
2697 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2698 return NULL;
2699 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002700}
2701
2702
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002703PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002704"S.expandtabs([tabsize]) -> string\n\
2705\n\
2706Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002707If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002708
2709static PyObject*
2710string_expandtabs(PyStringObject *self, PyObject *args)
2711{
2712 const char *e, *p;
2713 char *q;
2714 int i, j;
2715 PyObject *u;
2716 int tabsize = 8;
2717
2718 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2719 return NULL;
2720
Thomas Wouters7e474022000-07-16 12:04:32 +00002721 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002722 i = j = 0;
2723 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2724 for (p = PyString_AS_STRING(self); p < e; p++)
2725 if (*p == '\t') {
2726 if (tabsize > 0)
2727 j += tabsize - (j % tabsize);
2728 }
2729 else {
2730 j++;
2731 if (*p == '\n' || *p == '\r') {
2732 i += j;
2733 j = 0;
2734 }
2735 }
2736
2737 /* Second pass: create output string and fill it */
2738 u = PyString_FromStringAndSize(NULL, i + j);
2739 if (!u)
2740 return NULL;
2741
2742 j = 0;
2743 q = PyString_AS_STRING(u);
2744
2745 for (p = PyString_AS_STRING(self); p < e; p++)
2746 if (*p == '\t') {
2747 if (tabsize > 0) {
2748 i = tabsize - (j % tabsize);
2749 j += i;
2750 while (i--)
2751 *q++ = ' ';
2752 }
2753 }
2754 else {
2755 j++;
2756 *q++ = *p;
2757 if (*p == '\n' || *p == '\r')
2758 j = 0;
2759 }
2760
2761 return u;
2762}
2763
Tim Peters8fa5dd02001-09-12 02:18:30 +00002764static PyObject *
2765pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002766{
2767 PyObject *u;
2768
2769 if (left < 0)
2770 left = 0;
2771 if (right < 0)
2772 right = 0;
2773
Tim Peters8fa5dd02001-09-12 02:18:30 +00002774 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002775 Py_INCREF(self);
2776 return (PyObject *)self;
2777 }
2778
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002779 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002780 left + PyString_GET_SIZE(self) + right);
2781 if (u) {
2782 if (left)
2783 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002784 memcpy(PyString_AS_STRING(u) + left,
2785 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002786 PyString_GET_SIZE(self));
2787 if (right)
2788 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2789 fill, right);
2790 }
2791
2792 return u;
2793}
2794
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002795PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002796"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002797"\n"
2798"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002799"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002800
2801static PyObject *
2802string_ljust(PyStringObject *self, PyObject *args)
2803{
2804 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002805 char fillchar = ' ';
2806
2807 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002808 return NULL;
2809
Tim Peters8fa5dd02001-09-12 02:18:30 +00002810 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811 Py_INCREF(self);
2812 return (PyObject*) self;
2813 }
2814
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002815 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002816}
2817
2818
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002819PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002820"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002821"\n"
2822"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002823"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002824
2825static PyObject *
2826string_rjust(PyStringObject *self, PyObject *args)
2827{
2828 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002829 char fillchar = ' ';
2830
2831 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832 return NULL;
2833
Tim Peters8fa5dd02001-09-12 02:18:30 +00002834 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835 Py_INCREF(self);
2836 return (PyObject*) self;
2837 }
2838
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002839 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840}
2841
2842
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002843PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002844"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002845"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002846"Return S centered in a string of length width. Padding is\n"
2847"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002848
2849static PyObject *
2850string_center(PyStringObject *self, PyObject *args)
2851{
2852 int marg, left;
2853 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002854 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002855
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002856 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857 return NULL;
2858
Tim Peters8fa5dd02001-09-12 02:18:30 +00002859 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860 Py_INCREF(self);
2861 return (PyObject*) self;
2862 }
2863
2864 marg = width - PyString_GET_SIZE(self);
2865 left = marg / 2 + (marg & width & 1);
2866
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002867 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002868}
2869
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002870PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002871"S.zfill(width) -> string\n"
2872"\n"
2873"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002874"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002875
2876static PyObject *
2877string_zfill(PyStringObject *self, PyObject *args)
2878{
2879 int fill;
2880 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002881 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002882
2883 int width;
2884 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2885 return NULL;
2886
2887 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002888 if (PyString_CheckExact(self)) {
2889 Py_INCREF(self);
2890 return (PyObject*) self;
2891 }
2892 else
2893 return PyString_FromStringAndSize(
2894 PyString_AS_STRING(self),
2895 PyString_GET_SIZE(self)
2896 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002897 }
2898
2899 fill = width - PyString_GET_SIZE(self);
2900
2901 s = pad(self, fill, 0, '0');
2902
2903 if (s == NULL)
2904 return NULL;
2905
2906 p = PyString_AS_STRING(s);
2907 if (p[fill] == '+' || p[fill] == '-') {
2908 /* move sign to beginning of string */
2909 p[0] = p[fill];
2910 p[fill] = '0';
2911 }
2912
2913 return (PyObject*) s;
2914}
2915
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002916PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002917"S.isspace() -> bool\n\
2918\n\
2919Return True if all characters in S are whitespace\n\
2920and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002921
2922static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002923string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002924{
Fred Drakeba096332000-07-09 07:04:36 +00002925 register const unsigned char *p
2926 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002927 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002928
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929 /* Shortcut for single character strings */
2930 if (PyString_GET_SIZE(self) == 1 &&
2931 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002932 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002933
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002934 /* Special case for empty strings */
2935 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002936 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002937
Guido van Rossum4c08d552000-03-10 22:55:18 +00002938 e = p + PyString_GET_SIZE(self);
2939 for (; p < e; p++) {
2940 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002941 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002943 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002944}
2945
2946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002947PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002948"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002949\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002950Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002951and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002952
2953static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002954string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002955{
Fred Drakeba096332000-07-09 07:04:36 +00002956 register const unsigned char *p
2957 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002958 register const unsigned char *e;
2959
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002960 /* Shortcut for single character strings */
2961 if (PyString_GET_SIZE(self) == 1 &&
2962 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002963 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002964
2965 /* Special case for empty strings */
2966 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002967 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002968
2969 e = p + PyString_GET_SIZE(self);
2970 for (; p < e; p++) {
2971 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002972 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002973 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002974 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002975}
2976
2977
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002978PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002979"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002980\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002981Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002982and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002983
2984static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002985string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002986{
Fred Drakeba096332000-07-09 07:04:36 +00002987 register const unsigned char *p
2988 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002989 register const unsigned char *e;
2990
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002991 /* Shortcut for single character strings */
2992 if (PyString_GET_SIZE(self) == 1 &&
2993 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002994 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002995
2996 /* Special case for empty strings */
2997 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002998 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002999
3000 e = p + PyString_GET_SIZE(self);
3001 for (; p < e; p++) {
3002 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003003 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003004 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003005 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003006}
3007
3008
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003009PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003010"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003011\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003012Return True if all characters in S are digits\n\
3013and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014
3015static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003016string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003017{
Fred Drakeba096332000-07-09 07:04:36 +00003018 register const unsigned char *p
3019 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003020 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003021
Guido van Rossum4c08d552000-03-10 22:55:18 +00003022 /* Shortcut for single character strings */
3023 if (PyString_GET_SIZE(self) == 1 &&
3024 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003025 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003026
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003027 /* Special case for empty strings */
3028 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003029 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003030
Guido van Rossum4c08d552000-03-10 22:55:18 +00003031 e = p + PyString_GET_SIZE(self);
3032 for (; p < e; p++) {
3033 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003034 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003035 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003036 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003037}
3038
3039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003040PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003041"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003042\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003044at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003045
3046static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003047string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048{
Fred Drakeba096332000-07-09 07:04:36 +00003049 register const unsigned char *p
3050 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003051 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003052 int cased;
3053
Guido van Rossum4c08d552000-03-10 22:55:18 +00003054 /* Shortcut for single character strings */
3055 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003056 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003058 /* Special case for empty strings */
3059 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003060 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003061
Guido van Rossum4c08d552000-03-10 22:55:18 +00003062 e = p + PyString_GET_SIZE(self);
3063 cased = 0;
3064 for (; p < e; p++) {
3065 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003066 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 else if (!cased && islower(*p))
3068 cased = 1;
3069 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071}
3072
3073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003074PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003075"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003077Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003078at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079
3080static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003081string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082{
Fred Drakeba096332000-07-09 07:04:36 +00003083 register const unsigned char *p
3084 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003085 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086 int cased;
3087
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 /* Shortcut for single character strings */
3089 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003090 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003092 /* Special case for empty strings */
3093 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003095
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096 e = p + PyString_GET_SIZE(self);
3097 cased = 0;
3098 for (; p < e; p++) {
3099 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003100 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101 else if (!cased && isupper(*p))
3102 cased = 1;
3103 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003104 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003105}
3106
3107
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003108PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003109"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003111Return True if S is a titlecased string and there is at least one\n\
3112character in S, i.e. uppercase characters may only follow uncased\n\
3113characters and lowercase characters only cased ones. Return False\n\
3114otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115
3116static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003117string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118{
Fred Drakeba096332000-07-09 07:04:36 +00003119 register const unsigned char *p
3120 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003121 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003122 int cased, previous_is_cased;
3123
Guido van Rossum4c08d552000-03-10 22:55:18 +00003124 /* Shortcut for single character strings */
3125 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003128 /* Special case for empty strings */
3129 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003130 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003131
Guido van Rossum4c08d552000-03-10 22:55:18 +00003132 e = p + PyString_GET_SIZE(self);
3133 cased = 0;
3134 previous_is_cased = 0;
3135 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003136 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137
3138 if (isupper(ch)) {
3139 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003140 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 previous_is_cased = 1;
3142 cased = 1;
3143 }
3144 else if (islower(ch)) {
3145 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003146 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 previous_is_cased = 1;
3148 cased = 1;
3149 }
3150 else
3151 previous_is_cased = 0;
3152 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003153 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154}
3155
3156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003157PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003158"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159\n\
3160Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003161Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003162is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164static PyObject*
3165string_splitlines(PyStringObject *self, PyObject *args)
3166{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 register int i;
3168 register int j;
3169 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003170 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171 PyObject *list;
3172 PyObject *str;
3173 char *data;
3174
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003175 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 return NULL;
3177
3178 data = PyString_AS_STRING(self);
3179 len = PyString_GET_SIZE(self);
3180
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181 list = PyList_New(0);
3182 if (!list)
3183 goto onError;
3184
3185 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003186 int eol;
3187
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 /* Find a line and append it */
3189 while (i < len && data[i] != '\n' && data[i] != '\r')
3190 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191
3192 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003193 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 if (i < len) {
3195 if (data[i] == '\r' && i + 1 < len &&
3196 data[i+1] == '\n')
3197 i += 2;
3198 else
3199 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003200 if (keepends)
3201 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003203 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204 j = i;
3205 }
3206 if (j < len) {
3207 SPLIT_APPEND(data, j, len);
3208 }
3209
3210 return list;
3211
3212 onError:
3213 Py_DECREF(list);
3214 return NULL;
3215}
3216
3217#undef SPLIT_APPEND
3218
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003219static PyObject *
3220string_getnewargs(PyStringObject *v)
3221{
3222 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3223}
3224
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003225
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003226static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003228 /* Counterparts of the obsolete stropmodule functions; except
3229 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003230 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3231 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003232 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003233 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3234 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003235 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3236 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3237 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3238 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3239 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3240 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3241 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003242 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3243 capitalize__doc__},
3244 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3245 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3246 endswith__doc__},
3247 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3248 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3249 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3250 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3251 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3252 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3253 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3254 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3255 startswith__doc__},
3256 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3257 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3258 swapcase__doc__},
3259 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3260 translate__doc__},
3261 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3262 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3263 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3264 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3265 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3266 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3267 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3268 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3269 expandtabs__doc__},
3270 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3271 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003272 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003273 {NULL, NULL} /* sentinel */
3274};
3275
Jeremy Hylton938ace62002-07-17 16:30:39 +00003276static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003277str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3278
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003279static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003280string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003281{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003282 PyObject *x = NULL;
3283 static char *kwlist[] = {"object", 0};
3284
Guido van Rossumae960af2001-08-30 03:11:59 +00003285 if (type != &PyString_Type)
3286 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003287 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3288 return NULL;
3289 if (x == NULL)
3290 return PyString_FromString("");
3291 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003292}
3293
Guido van Rossumae960af2001-08-30 03:11:59 +00003294static PyObject *
3295str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3296{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003297 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003298 int n;
3299
3300 assert(PyType_IsSubtype(type, &PyString_Type));
3301 tmp = string_new(&PyString_Type, args, kwds);
3302 if (tmp == NULL)
3303 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003304 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003305 n = PyString_GET_SIZE(tmp);
3306 pnew = type->tp_alloc(type, n);
3307 if (pnew != NULL) {
3308 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003309 ((PyStringObject *)pnew)->ob_shash =
3310 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003311 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003312 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003313 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003314 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003315}
3316
Guido van Rossumcacfc072002-05-24 19:01:59 +00003317static PyObject *
3318basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3319{
3320 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003321 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003322 return NULL;
3323}
3324
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003325static PyObject *
3326string_mod(PyObject *v, PyObject *w)
3327{
3328 if (!PyString_Check(v)) {
3329 Py_INCREF(Py_NotImplemented);
3330 return Py_NotImplemented;
3331 }
3332 return PyString_Format(v, w);
3333}
3334
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003335PyDoc_STRVAR(basestring_doc,
3336"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003337
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003338static PyNumberMethods string_as_number = {
3339 0, /*nb_add*/
3340 0, /*nb_subtract*/
3341 0, /*nb_multiply*/
3342 0, /*nb_divide*/
3343 string_mod, /*nb_remainder*/
3344};
3345
3346
Guido van Rossumcacfc072002-05-24 19:01:59 +00003347PyTypeObject PyBaseString_Type = {
3348 PyObject_HEAD_INIT(&PyType_Type)
3349 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003350 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003351 0,
3352 0,
3353 0, /* tp_dealloc */
3354 0, /* tp_print */
3355 0, /* tp_getattr */
3356 0, /* tp_setattr */
3357 0, /* tp_compare */
3358 0, /* tp_repr */
3359 0, /* tp_as_number */
3360 0, /* tp_as_sequence */
3361 0, /* tp_as_mapping */
3362 0, /* tp_hash */
3363 0, /* tp_call */
3364 0, /* tp_str */
3365 0, /* tp_getattro */
3366 0, /* tp_setattro */
3367 0, /* tp_as_buffer */
3368 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3369 basestring_doc, /* tp_doc */
3370 0, /* tp_traverse */
3371 0, /* tp_clear */
3372 0, /* tp_richcompare */
3373 0, /* tp_weaklistoffset */
3374 0, /* tp_iter */
3375 0, /* tp_iternext */
3376 0, /* tp_methods */
3377 0, /* tp_members */
3378 0, /* tp_getset */
3379 &PyBaseObject_Type, /* tp_base */
3380 0, /* tp_dict */
3381 0, /* tp_descr_get */
3382 0, /* tp_descr_set */
3383 0, /* tp_dictoffset */
3384 0, /* tp_init */
3385 0, /* tp_alloc */
3386 basestring_new, /* tp_new */
3387 0, /* tp_free */
3388};
3389
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003390PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003391"str(object) -> string\n\
3392\n\
3393Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003394If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003395
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003396PyTypeObject PyString_Type = {
3397 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003398 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003399 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003400 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003401 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003402 (destructor)string_dealloc, /* tp_dealloc */
3403 (printfunc)string_print, /* tp_print */
3404 0, /* tp_getattr */
3405 0, /* tp_setattr */
3406 0, /* tp_compare */
3407 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003408 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003409 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003410 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003411 (hashfunc)string_hash, /* tp_hash */
3412 0, /* tp_call */
3413 (reprfunc)string_str, /* tp_str */
3414 PyObject_GenericGetAttr, /* tp_getattro */
3415 0, /* tp_setattro */
3416 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003417 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3418 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003419 string_doc, /* tp_doc */
3420 0, /* tp_traverse */
3421 0, /* tp_clear */
3422 (richcmpfunc)string_richcompare, /* tp_richcompare */
3423 0, /* tp_weaklistoffset */
3424 0, /* tp_iter */
3425 0, /* tp_iternext */
3426 string_methods, /* tp_methods */
3427 0, /* tp_members */
3428 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003429 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003430 0, /* tp_dict */
3431 0, /* tp_descr_get */
3432 0, /* tp_descr_set */
3433 0, /* tp_dictoffset */
3434 0, /* tp_init */
3435 0, /* tp_alloc */
3436 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003437 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003438};
3439
3440void
Fred Drakeba096332000-07-09 07:04:36 +00003441PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003442{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003444 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003445 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 if (w == NULL || !PyString_Check(*pv)) {
3447 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003448 *pv = NULL;
3449 return;
3450 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003451 v = string_concat((PyStringObject *) *pv, w);
3452 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003453 *pv = v;
3454}
3455
Guido van Rossum013142a1994-08-30 08:19:36 +00003456void
Fred Drakeba096332000-07-09 07:04:36 +00003457PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003458{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003459 PyString_Concat(pv, w);
3460 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003461}
3462
3463
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003464/* The following function breaks the notion that strings are immutable:
3465 it changes the size of a string. We get away with this only if there
3466 is only one module referencing the object. You can also think of it
3467 as creating a new string object and destroying the old one, only
3468 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003469 already be known to some other part of the code...
3470 Note that if there's not enough memory to resize the string, the original
3471 string object at *pv is deallocated, *pv is set to NULL, an "out of
3472 memory" exception is set, and -1 is returned. Else (on success) 0 is
3473 returned, and the value in *pv may or may not be the same as on input.
3474 As always, an extra byte is allocated for a trailing \0 byte (newsize
3475 does *not* include that), and a trailing \0 byte is stored.
3476*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003477
3478int
Fred Drakeba096332000-07-09 07:04:36 +00003479_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003480{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003481 register PyObject *v;
3482 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003483 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003484 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003485 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003486 Py_DECREF(v);
3487 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003488 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003489 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003490 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003491 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003492 _Py_ForgetReference(v);
3493 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003494 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003495 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003496 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003497 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003498 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003499 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003500 _Py_NewReference(*pv);
3501 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003502 sv->ob_size = newsize;
3503 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003504 return 0;
3505}
Guido van Rossume5372401993-03-16 12:15:04 +00003506
3507/* Helpers for formatstring */
3508
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003510getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003511{
3512 int argidx = *p_argidx;
3513 if (argidx < arglen) {
3514 (*p_argidx)++;
3515 if (arglen < 0)
3516 return args;
3517 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003518 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003519 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003520 PyErr_SetString(PyExc_TypeError,
3521 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003522 return NULL;
3523}
3524
Tim Peters38fd5b62000-09-21 05:43:11 +00003525/* Format codes
3526 * F_LJUST '-'
3527 * F_SIGN '+'
3528 * F_BLANK ' '
3529 * F_ALT '#'
3530 * F_ZERO '0'
3531 */
Guido van Rossume5372401993-03-16 12:15:04 +00003532#define F_LJUST (1<<0)
3533#define F_SIGN (1<<1)
3534#define F_BLANK (1<<2)
3535#define F_ALT (1<<3)
3536#define F_ZERO (1<<4)
3537
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003538static int
Fred Drakeba096332000-07-09 07:04:36 +00003539formatfloat(char *buf, size_t buflen, int flags,
3540 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003541{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003542 /* fmt = '%#.' + `prec` + `type`
3543 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003544 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003545 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003546 x = PyFloat_AsDouble(v);
3547 if (x == -1.0 && PyErr_Occurred()) {
3548 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003549 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003550 }
Guido van Rossume5372401993-03-16 12:15:04 +00003551 if (prec < 0)
3552 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003553 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3554 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003555 /* Worst case length calc to ensure no buffer overrun:
3556
3557 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003558 fmt = %#.<prec>g
3559 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003560 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003561 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003562
3563 'f' formats:
3564 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3565 len = 1 + 50 + 1 + prec = 52 + prec
3566
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003567 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003568 always given), therefore increase the length by one.
3569
3570 */
3571 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3572 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003573 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003574 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003575 return -1;
3576 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003577 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3578 (flags&F_ALT) ? "#" : "",
3579 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003580 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003581 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003582}
3583
Tim Peters38fd5b62000-09-21 05:43:11 +00003584/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3585 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3586 * Python's regular ints.
3587 * Return value: a new PyString*, or NULL if error.
3588 * . *pbuf is set to point into it,
3589 * *plen set to the # of chars following that.
3590 * Caller must decref it when done using pbuf.
3591 * The string starting at *pbuf is of the form
3592 * "-"? ("0x" | "0X")? digit+
3593 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003594 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003595 * There will be at least prec digits, zero-filled on the left if
3596 * necessary to get that many.
3597 * val object to be converted
3598 * flags bitmask of format flags; only F_ALT is looked at
3599 * prec minimum number of digits; 0-fill on left if needed
3600 * type a character in [duoxX]; u acts the same as d
3601 *
3602 * CAUTION: o, x and X conversions on regular ints can never
3603 * produce a '-' sign, but can for Python's unbounded ints.
3604 */
3605PyObject*
3606_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3607 char **pbuf, int *plen)
3608{
3609 PyObject *result = NULL;
3610 char *buf;
3611 int i;
3612 int sign; /* 1 if '-', else 0 */
3613 int len; /* number of characters */
3614 int numdigits; /* len == numnondigits + numdigits */
3615 int numnondigits = 0;
3616
3617 switch (type) {
3618 case 'd':
3619 case 'u':
3620 result = val->ob_type->tp_str(val);
3621 break;
3622 case 'o':
3623 result = val->ob_type->tp_as_number->nb_oct(val);
3624 break;
3625 case 'x':
3626 case 'X':
3627 numnondigits = 2;
3628 result = val->ob_type->tp_as_number->nb_hex(val);
3629 break;
3630 default:
3631 assert(!"'type' not in [duoxX]");
3632 }
3633 if (!result)
3634 return NULL;
3635
3636 /* To modify the string in-place, there can only be one reference. */
3637 if (result->ob_refcnt != 1) {
3638 PyErr_BadInternalCall();
3639 return NULL;
3640 }
3641 buf = PyString_AsString(result);
3642 len = PyString_Size(result);
3643 if (buf[len-1] == 'L') {
3644 --len;
3645 buf[len] = '\0';
3646 }
3647 sign = buf[0] == '-';
3648 numnondigits += sign;
3649 numdigits = len - numnondigits;
3650 assert(numdigits > 0);
3651
Tim Petersfff53252001-04-12 18:38:48 +00003652 /* Get rid of base marker unless F_ALT */
3653 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003654 /* Need to skip 0x, 0X or 0. */
3655 int skipped = 0;
3656 switch (type) {
3657 case 'o':
3658 assert(buf[sign] == '0');
3659 /* If 0 is only digit, leave it alone. */
3660 if (numdigits > 1) {
3661 skipped = 1;
3662 --numdigits;
3663 }
3664 break;
3665 case 'x':
3666 case 'X':
3667 assert(buf[sign] == '0');
3668 assert(buf[sign + 1] == 'x');
3669 skipped = 2;
3670 numnondigits -= 2;
3671 break;
3672 }
3673 if (skipped) {
3674 buf += skipped;
3675 len -= skipped;
3676 if (sign)
3677 buf[0] = '-';
3678 }
3679 assert(len == numnondigits + numdigits);
3680 assert(numdigits > 0);
3681 }
3682
3683 /* Fill with leading zeroes to meet minimum width. */
3684 if (prec > numdigits) {
3685 PyObject *r1 = PyString_FromStringAndSize(NULL,
3686 numnondigits + prec);
3687 char *b1;
3688 if (!r1) {
3689 Py_DECREF(result);
3690 return NULL;
3691 }
3692 b1 = PyString_AS_STRING(r1);
3693 for (i = 0; i < numnondigits; ++i)
3694 *b1++ = *buf++;
3695 for (i = 0; i < prec - numdigits; i++)
3696 *b1++ = '0';
3697 for (i = 0; i < numdigits; i++)
3698 *b1++ = *buf++;
3699 *b1 = '\0';
3700 Py_DECREF(result);
3701 result = r1;
3702 buf = PyString_AS_STRING(result);
3703 len = numnondigits + prec;
3704 }
3705
3706 /* Fix up case for hex conversions. */
3707 switch (type) {
3708 case 'x':
3709 /* Need to convert all upper case letters to lower case. */
3710 for (i = 0; i < len; i++)
3711 if (buf[i] >= 'A' && buf[i] <= 'F')
3712 buf[i] += 'a'-'A';
3713 break;
3714 case 'X':
3715 /* Need to convert 0x to 0X (and -0x to -0X). */
3716 if (buf[sign + 1] == 'x')
3717 buf[sign + 1] = 'X';
3718 break;
3719 }
3720 *pbuf = buf;
3721 *plen = len;
3722 return result;
3723}
3724
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003725static int
Fred Drakeba096332000-07-09 07:04:36 +00003726formatint(char *buf, size_t buflen, int flags,
3727 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003728{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003729 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003730 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3731 + 1 + 1 = 24 */
3732 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003733 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003734 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003735
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003736 x = PyInt_AsLong(v);
3737 if (x == -1 && PyErr_Occurred()) {
3738 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003739 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003740 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003741 if (x < 0 && type == 'u') {
3742 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003743 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003744 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3745 sign = "-";
3746 else
3747 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003748 if (prec < 0)
3749 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003750
3751 if ((flags & F_ALT) &&
3752 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003753 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003754 * of issues that cause pain:
3755 * - when 0 is being converted, the C standard leaves off
3756 * the '0x' or '0X', which is inconsistent with other
3757 * %#x/%#X conversions and inconsistent with Python's
3758 * hex() function
3759 * - there are platforms that violate the standard and
3760 * convert 0 with the '0x' or '0X'
3761 * (Metrowerks, Compaq Tru64)
3762 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003763 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003764 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003765 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003766 * We can achieve the desired consistency by inserting our
3767 * own '0x' or '0X' prefix, and substituting %x/%X in place
3768 * of %#x/%#X.
3769 *
3770 * Note that this is the same approach as used in
3771 * formatint() in unicodeobject.c
3772 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003773 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3774 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003775 }
3776 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003777 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3778 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003779 prec, type);
3780 }
3781
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003782 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3783 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003784 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003785 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003786 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003787 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003788 return -1;
3789 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003790 if (sign[0])
3791 PyOS_snprintf(buf, buflen, fmt, -x);
3792 else
3793 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003794 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003795}
3796
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003797static int
Fred Drakeba096332000-07-09 07:04:36 +00003798formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003799{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003800 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003801 if (PyString_Check(v)) {
3802 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003803 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003804 }
3805 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003806 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003807 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003808 }
3809 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003810 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003811}
3812
Guido van Rossum013142a1994-08-30 08:19:36 +00003813
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003814/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3815
3816 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3817 chars are formatted. XXX This is a magic number. Each formatting
3818 routine does bounds checking to ensure no overflow, but a better
3819 solution may be to malloc a buffer of appropriate size for each
3820 format. For now, the current solution is sufficient.
3821*/
3822#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003823
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003824PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003825PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003826{
3827 char *fmt, *res;
3828 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003829 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003830 PyObject *result, *orig_args;
3831#ifdef Py_USING_UNICODE
3832 PyObject *v, *w;
3833#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003834 PyObject *dict = NULL;
3835 if (format == NULL || !PyString_Check(format) || args == NULL) {
3836 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003837 return NULL;
3838 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003839 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003840 fmt = PyString_AS_STRING(format);
3841 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003842 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003843 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003844 if (result == NULL)
3845 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003846 res = PyString_AsString(result);
3847 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003848 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003849 argidx = 0;
3850 }
3851 else {
3852 arglen = -1;
3853 argidx = -2;
3854 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003855 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3856 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003857 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003858 while (--fmtcnt >= 0) {
3859 if (*fmt != '%') {
3860 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003861 rescnt = fmtcnt + 100;
3862 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003863 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003864 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003865 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003867 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003868 }
3869 *res++ = *fmt++;
3870 }
3871 else {
3872 /* Got a format specifier */
3873 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003874 int width = -1;
3875 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003876 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003877 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003878 PyObject *v = NULL;
3879 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003880 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003881 int sign;
3882 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003883 char formatbuf[FORMATBUFLEN];
3884 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003885#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003886 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003887 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003888#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003889
Guido van Rossumda9c2711996-12-05 21:58:58 +00003890 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003891 if (*fmt == '(') {
3892 char *keystart;
3893 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003894 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003895 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003896
3897 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003898 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003899 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003900 goto error;
3901 }
3902 ++fmt;
3903 --fmtcnt;
3904 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003905 /* Skip over balanced parentheses */
3906 while (pcount > 0 && --fmtcnt >= 0) {
3907 if (*fmt == ')')
3908 --pcount;
3909 else if (*fmt == '(')
3910 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003911 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003912 }
3913 keylen = fmt - keystart - 1;
3914 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003915 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003916 "incomplete format key");
3917 goto error;
3918 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003919 key = PyString_FromStringAndSize(keystart,
3920 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003921 if (key == NULL)
3922 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003923 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003924 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003925 args_owned = 0;
3926 }
3927 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003929 if (args == NULL) {
3930 goto error;
3931 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003932 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003933 arglen = -1;
3934 argidx = -2;
3935 }
Guido van Rossume5372401993-03-16 12:15:04 +00003936 while (--fmtcnt >= 0) {
3937 switch (c = *fmt++) {
3938 case '-': flags |= F_LJUST; continue;
3939 case '+': flags |= F_SIGN; continue;
3940 case ' ': flags |= F_BLANK; continue;
3941 case '#': flags |= F_ALT; continue;
3942 case '0': flags |= F_ZERO; continue;
3943 }
3944 break;
3945 }
3946 if (c == '*') {
3947 v = getnextarg(args, arglen, &argidx);
3948 if (v == NULL)
3949 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003950 if (!PyInt_Check(v)) {
3951 PyErr_SetString(PyExc_TypeError,
3952 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003953 goto error;
3954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003956 if (width < 0) {
3957 flags |= F_LJUST;
3958 width = -width;
3959 }
Guido van Rossume5372401993-03-16 12:15:04 +00003960 if (--fmtcnt >= 0)
3961 c = *fmt++;
3962 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003963 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003964 width = c - '0';
3965 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003966 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003967 if (!isdigit(c))
3968 break;
3969 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003970 PyErr_SetString(
3971 PyExc_ValueError,
3972 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003973 goto error;
3974 }
3975 width = width*10 + (c - '0');
3976 }
3977 }
3978 if (c == '.') {
3979 prec = 0;
3980 if (--fmtcnt >= 0)
3981 c = *fmt++;
3982 if (c == '*') {
3983 v = getnextarg(args, arglen, &argidx);
3984 if (v == NULL)
3985 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986 if (!PyInt_Check(v)) {
3987 PyErr_SetString(
3988 PyExc_TypeError,
3989 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003990 goto error;
3991 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003992 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003993 if (prec < 0)
3994 prec = 0;
3995 if (--fmtcnt >= 0)
3996 c = *fmt++;
3997 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003998 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003999 prec = c - '0';
4000 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004001 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004002 if (!isdigit(c))
4003 break;
4004 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004005 PyErr_SetString(
4006 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004007 "prec too big");
4008 goto error;
4009 }
4010 prec = prec*10 + (c - '0');
4011 }
4012 }
4013 } /* prec */
4014 if (fmtcnt >= 0) {
4015 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004016 if (--fmtcnt >= 0)
4017 c = *fmt++;
4018 }
4019 }
4020 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 PyErr_SetString(PyExc_ValueError,
4022 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004023 goto error;
4024 }
4025 if (c != '%') {
4026 v = getnextarg(args, arglen, &argidx);
4027 if (v == NULL)
4028 goto error;
4029 }
4030 sign = 0;
4031 fill = ' ';
4032 switch (c) {
4033 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004034 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004035 len = 1;
4036 break;
4037 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004038#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004039 if (PyUnicode_Check(v)) {
4040 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004041 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004042 goto unicode;
4043 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004044#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004045 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004046 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004047 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004048 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004049 else
4050 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004051 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004052 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004053 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004054 /* XXX Note: this should never happen,
4055 since PyObject_Repr() and
4056 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004057 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004058 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004059 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004060 goto error;
4061 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004062 pbuf = PyString_AS_STRING(temp);
4063 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004064 if (prec >= 0 && len > prec)
4065 len = prec;
4066 break;
4067 case 'i':
4068 case 'd':
4069 case 'u':
4070 case 'o':
4071 case 'x':
4072 case 'X':
4073 if (c == 'i')
4074 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004075 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004076 temp = _PyString_FormatLong(v, flags,
4077 prec, c, &pbuf, &len);
4078 if (!temp)
4079 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004080 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004081 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004082 else {
4083 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004084 len = formatint(pbuf,
4085 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004086 flags, prec, c, v);
4087 if (len < 0)
4088 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004089 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004090 }
4091 if (flags & F_ZERO)
4092 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004093 break;
4094 case 'e':
4095 case 'E':
4096 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004097 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004098 case 'g':
4099 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004100 if (c == 'F')
4101 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004102 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004103 len = formatfloat(pbuf, sizeof(formatbuf),
4104 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004105 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004106 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004107 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004108 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004109 fill = '0';
4110 break;
4111 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004112#ifdef Py_USING_UNICODE
4113 if (PyUnicode_Check(v)) {
4114 fmt = fmt_start;
4115 argidx = argidx_start;
4116 goto unicode;
4117 }
4118#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004119 pbuf = formatbuf;
4120 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004121 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004122 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004123 break;
4124 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004125 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004126 "unsupported format character '%c' (0x%x) "
4127 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004128 c, c,
4129 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004130 goto error;
4131 }
4132 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004133 if (*pbuf == '-' || *pbuf == '+') {
4134 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004135 len--;
4136 }
4137 else if (flags & F_SIGN)
4138 sign = '+';
4139 else if (flags & F_BLANK)
4140 sign = ' ';
4141 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004142 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004143 }
4144 if (width < len)
4145 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004146 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004147 reslen -= rescnt;
4148 rescnt = width + fmtcnt + 100;
4149 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004150 if (reslen < 0) {
4151 Py_DECREF(result);
4152 return PyErr_NoMemory();
4153 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004154 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004155 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004156 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004157 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004158 }
4159 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004160 if (fill != ' ')
4161 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004162 rescnt--;
4163 if (width > len)
4164 width--;
4165 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004166 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4167 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004168 assert(pbuf[1] == c);
4169 if (fill != ' ') {
4170 *res++ = *pbuf++;
4171 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004172 }
Tim Petersfff53252001-04-12 18:38:48 +00004173 rescnt -= 2;
4174 width -= 2;
4175 if (width < 0)
4176 width = 0;
4177 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004178 }
4179 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004180 do {
4181 --rescnt;
4182 *res++ = fill;
4183 } while (--width > len);
4184 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004185 if (fill == ' ') {
4186 if (sign)
4187 *res++ = sign;
4188 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004189 (c == 'x' || c == 'X')) {
4190 assert(pbuf[0] == '0');
4191 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004192 *res++ = *pbuf++;
4193 *res++ = *pbuf++;
4194 }
4195 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004196 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004197 res += len;
4198 rescnt -= len;
4199 while (--width >= len) {
4200 --rescnt;
4201 *res++ = ' ';
4202 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004203 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004204 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004205 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004206 goto error;
4207 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004208 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004209 } /* '%' */
4210 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004211 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004212 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004213 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004214 goto error;
4215 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004216 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004217 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004218 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004219 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004220 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004221
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004222#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004223 unicode:
4224 if (args_owned) {
4225 Py_DECREF(args);
4226 args_owned = 0;
4227 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004228 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004229 if (PyTuple_Check(orig_args) && argidx > 0) {
4230 PyObject *v;
4231 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4232 v = PyTuple_New(n);
4233 if (v == NULL)
4234 goto error;
4235 while (--n >= 0) {
4236 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4237 Py_INCREF(w);
4238 PyTuple_SET_ITEM(v, n, w);
4239 }
4240 args = v;
4241 } else {
4242 Py_INCREF(orig_args);
4243 args = orig_args;
4244 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004245 args_owned = 1;
4246 /* Take what we have of the result and let the Unicode formatting
4247 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004248 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004249 if (_PyString_Resize(&result, rescnt))
4250 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004251 fmtcnt = PyString_GET_SIZE(format) - \
4252 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004253 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4254 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004255 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004256 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004257 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004258 if (v == NULL)
4259 goto error;
4260 /* Paste what we have (result) to what the Unicode formatting
4261 function returned (v) and return the result (or error) */
4262 w = PyUnicode_Concat(result, v);
4263 Py_DECREF(result);
4264 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004265 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004266 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004267#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004268
Guido van Rossume5372401993-03-16 12:15:04 +00004269 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004270 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004271 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004272 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004273 }
Guido van Rossume5372401993-03-16 12:15:04 +00004274 return NULL;
4275}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004276
Guido van Rossum2a61e741997-01-18 07:55:05 +00004277void
Fred Drakeba096332000-07-09 07:04:36 +00004278PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004279{
4280 register PyStringObject *s = (PyStringObject *)(*p);
4281 PyObject *t;
4282 if (s == NULL || !PyString_Check(s))
4283 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004284 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004285 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004286 if (interned == NULL) {
4287 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004288 if (interned == NULL) {
4289 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004290 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004291 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004292 }
4293 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4294 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004295 Py_DECREF(*p);
4296 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004297 return;
4298 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004299 /* Ensure that only true string objects appear in the intern dict */
4300 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004301 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4302 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004303 if (t == NULL) {
4304 PyErr_Clear();
4305 return;
Tim Peters111f6092001-09-12 07:54:51 +00004306 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004307 } else {
4308 t = (PyObject*) s;
4309 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004310 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004311
4312 if (PyDict_SetItem(interned, t, t) == 0) {
4313 /* The two references in interned are not counted by
4314 refcnt. The string deallocator will take care of this */
4315 ((PyObject *)t)->ob_refcnt-=2;
4316 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4317 Py_DECREF(*p);
4318 *p = t;
4319 return;
4320 }
4321 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004322 PyErr_Clear();
4323}
4324
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004325void
4326PyString_InternImmortal(PyObject **p)
4327{
4328 PyString_InternInPlace(p);
4329 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4330 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4331 Py_INCREF(*p);
4332 }
4333}
4334
Guido van Rossum2a61e741997-01-18 07:55:05 +00004335
4336PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004337PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004338{
4339 PyObject *s = PyString_FromString(cp);
4340 if (s == NULL)
4341 return NULL;
4342 PyString_InternInPlace(&s);
4343 return s;
4344}
4345
Guido van Rossum8cf04761997-08-02 02:57:45 +00004346void
Fred Drakeba096332000-07-09 07:04:36 +00004347PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004348{
4349 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004350 for (i = 0; i < UCHAR_MAX + 1; i++) {
4351 Py_XDECREF(characters[i]);
4352 characters[i] = NULL;
4353 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004354 Py_XDECREF(nullstring);
4355 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004356}
Barry Warsawa903ad982001-02-23 16:40:48 +00004357
Barry Warsawa903ad982001-02-23 16:40:48 +00004358void _Py_ReleaseInternedStrings(void)
4359{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004360 PyObject *keys;
4361 PyStringObject *s;
4362 int i, n;
4363
4364 if (interned == NULL || !PyDict_Check(interned))
4365 return;
4366 keys = PyDict_Keys(interned);
4367 if (keys == NULL || !PyList_Check(keys)) {
4368 PyErr_Clear();
4369 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004370 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004371
4372 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4373 detector, interned strings are not forcibly deallocated; rather, we
4374 give them their stolen references back, and then clear and DECREF
4375 the interned dict. */
4376
4377 fprintf(stderr, "releasing interned strings\n");
4378 n = PyList_GET_SIZE(keys);
4379 for (i = 0; i < n; i++) {
4380 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4381 switch (s->ob_sstate) {
4382 case SSTATE_NOT_INTERNED:
4383 /* XXX Shouldn't happen */
4384 break;
4385 case SSTATE_INTERNED_IMMORTAL:
4386 s->ob_refcnt += 1;
4387 break;
4388 case SSTATE_INTERNED_MORTAL:
4389 s->ob_refcnt += 2;
4390 break;
4391 default:
4392 Py_FatalError("Inconsistent interned string state.");
4393 }
4394 s->ob_sstate = SSTATE_NOT_INTERNED;
4395 }
4396 Py_DECREF(keys);
4397 PyDict_Clear(interned);
4398 Py_DECREF(interned);
4399 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004400}