blob: 866e7e84ba5675f7ea0a876b6804ca2f1c87bb7f [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000073 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000074 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000076 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000078 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000082 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000100PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101{
Tim Peters62de65b2001-12-06 20:29:32 +0000102 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000103 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000104
105 assert(str != NULL);
106 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000107 if (size > INT_MAX) {
108 PyErr_SetString(PyExc_OverflowError,
109 "string is too long for a Python string");
110 return NULL;
111 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0 && (op = nullstring) != NULL) {
113#ifdef COUNT_ALLOCS
114 null_strings++;
115#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
120#ifdef COUNT_ALLOCS
121 one_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000127 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000128 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000133 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000134 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000135 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000137 PyObject *t = (PyObject *)op;
138 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000139 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Barry Warsawdadace02001-08-24 18:32:06 +0000152PyObject *
153PyString_FromFormatV(const char *format, va_list vargs)
154{
Tim Petersc15c4f12001-10-02 21:32:07 +0000155 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 int n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
160
Tim Petersc15c4f12001-10-02 21:32:07 +0000161#ifdef VA_LIST_IS_ARRAY
162 memcpy(count, vargs, sizeof(va_list));
163#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000167 count = vargs;
168#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000169#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
175 ;
176
177 /* skip the 'l' in %ld, since it doesn't change the
178 width. although only %d is supported (see
179 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000180 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000181 if (*f == 'l' && *(f+1) == 'd')
182 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000183
Barry Warsawdadace02001-08-24 18:32:06 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'i': case 'x':
192 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000217 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000228 string = PyString_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231
Barry Warsawdadace02001-08-24 18:32:06 +0000232 s = PyString_AsString(string);
233
234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 int i, longflag = 0;
238 /* parse the width.precision part (we're only
239 interested in the precision value, if any) */
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 if (*f == '.') {
244 f++;
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 }
249 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
250 f++;
251 /* handle the long flag, but only for %ld. others
252 can be added when necessary. */
253 if (*f == 'l' && *(f+1) == 'd') {
254 longflag = 1;
255 ++f;
256 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000257
Barry Warsawdadace02001-08-24 18:32:06 +0000258 switch (*f) {
259 case 'c':
260 *s++ = va_arg(vargs, int);
261 break;
262 case 'd':
263 if (longflag)
264 sprintf(s, "%ld", va_arg(vargs, long));
265 else
266 sprintf(s, "%d", va_arg(vargs, int));
267 s += strlen(s);
268 break;
269 case 'i':
270 sprintf(s, "%i", va_arg(vargs, int));
271 s += strlen(s);
272 break;
273 case 'x':
274 sprintf(s, "%x", va_arg(vargs, int));
275 s += strlen(s);
276 break;
277 case 's':
278 p = va_arg(vargs, char*);
279 i = strlen(p);
280 if (n > 0 && i > n)
281 i = n;
282 memcpy(s, p, i);
283 s += i;
284 break;
285 case 'p':
286 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000287 /* %p is ill-defined: ensure leading 0x. */
288 if (s[1] == 'X')
289 s[1] = 'x';
290 else if (s[1] != 'x') {
291 memmove(s+2, s, strlen(s)+1);
292 s[0] = '0';
293 s[1] = 'x';
294 }
Barry Warsawdadace02001-08-24 18:32:06 +0000295 s += strlen(s);
296 break;
297 case '%':
298 *s++ = '%';
299 break;
300 default:
301 strcpy(s, p);
302 s += strlen(s);
303 goto end;
304 }
305 } else
306 *s++ = *f;
307 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000311 return string;
312}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000313
Barry Warsawdadace02001-08-24 18:32:06 +0000314PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000316{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000317 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318 va_list vargs;
319
320#ifdef HAVE_STDARG_PROTOTYPES
321 va_start(vargs, format);
322#else
323 va_start(vargs);
324#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000325 ret = PyString_FromFormatV(format, vargs);
326 va_end(vargs);
327 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000328}
329
330
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000331PyObject *PyString_Decode(const char *s,
332 int size,
333 const char *encoding,
334 const char *errors)
335{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000336 PyObject *v, *str;
337
338 str = PyString_FromStringAndSize(s, size);
339 if (str == NULL)
340 return NULL;
341 v = PyString_AsDecodedString(str, encoding, errors);
342 Py_DECREF(str);
343 return v;
344}
345
346PyObject *PyString_AsDecodedObject(PyObject *str,
347 const char *encoding,
348 const char *errors)
349{
350 PyObject *v;
351
352 if (!PyString_Check(str)) {
353 PyErr_BadArgument();
354 goto onError;
355 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000356
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000357 if (encoding == NULL) {
358#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000360#else
361 PyErr_SetString(PyExc_ValueError, "no encoding specified");
362 goto onError;
363#endif
364 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365
366 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000367 v = PyCodec_Decode(str, encoding, errors);
368 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000370
371 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000372
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000374 return NULL;
375}
376
377PyObject *PyString_AsDecodedString(PyObject *str,
378 const char *encoding,
379 const char *errors)
380{
381 PyObject *v;
382
383 v = PyString_AsDecodedObject(str, encoding, errors);
384 if (v == NULL)
385 goto onError;
386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 /* Convert Unicode to a string using the default encoding */
389 if (PyUnicode_Check(v)) {
390 PyObject *temp = v;
391 v = PyUnicode_AsEncodedString(v, NULL, NULL);
392 Py_DECREF(temp);
393 if (v == NULL)
394 goto onError;
395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 if (!PyString_Check(v)) {
398 PyErr_Format(PyExc_TypeError,
399 "decoder did not return a string object (type=%.400s)",
400 v->ob_type->tp_name);
401 Py_DECREF(v);
402 goto onError;
403 }
404
405 return v;
406
407 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 return NULL;
409}
410
411PyObject *PyString_Encode(const char *s,
412 int size,
413 const char *encoding,
414 const char *errors)
415{
416 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000417
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 str = PyString_FromStringAndSize(s, size);
419 if (str == NULL)
420 return NULL;
421 v = PyString_AsEncodedString(str, encoding, errors);
422 Py_DECREF(str);
423 return v;
424}
425
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000426PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 if (!PyString_Check(str)) {
433 PyErr_BadArgument();
434 goto onError;
435 }
436
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000437 if (encoding == NULL) {
438#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000440#else
441 PyErr_SetString(PyExc_ValueError, "no encoding specified");
442 goto onError;
443#endif
444 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445
446 /* Encode via the codec registry */
447 v = PyCodec_Encode(str, encoding, errors);
448 if (v == NULL)
449 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450
451 return v;
452
453 onError:
454 return NULL;
455}
456
457PyObject *PyString_AsEncodedString(PyObject *str,
458 const char *encoding,
459 const char *errors)
460{
461 PyObject *v;
462
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000463 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464 if (v == NULL)
465 goto onError;
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 /* Convert Unicode to a string using the default encoding */
469 if (PyUnicode_Check(v)) {
470 PyObject *temp = v;
471 v = PyUnicode_AsEncodedString(v, NULL, NULL);
472 Py_DECREF(temp);
473 if (v == NULL)
474 goto onError;
475 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 if (!PyString_Check(v)) {
478 PyErr_Format(PyExc_TypeError,
479 "encoder did not return a string object (type=%.400s)",
480 v->ob_type->tp_name);
481 Py_DECREF(v);
482 goto onError;
483 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000484
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000485 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 onError:
488 return NULL;
489}
490
Guido van Rossum234f9421993-06-17 12:35:49 +0000491static void
Fred Drakeba096332000-07-09 07:04:36 +0000492string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000493{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000494 switch (PyString_CHECK_INTERNED(op)) {
495 case SSTATE_NOT_INTERNED:
496 break;
497
498 case SSTATE_INTERNED_MORTAL:
499 /* revive dead object temporarily for DelItem */
500 op->ob_refcnt = 3;
501 if (PyDict_DelItem(interned, op) != 0)
502 Py_FatalError(
503 "deletion of interned string failed");
504 break;
505
506 case SSTATE_INTERNED_IMMORTAL:
507 Py_FatalError("Immortal interned string died.");
508
509 default:
510 Py_FatalError("Inconsistent interned string state.");
511 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000512 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000513}
514
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000515/* Unescape a backslash-escaped string. If unicode is non-zero,
516 the string is a u-literal. If recode_encoding is non-zero,
517 the string is UTF-8 encoded and should be re-encoded in the
518 specified encoding. */
519
520PyObject *PyString_DecodeEscape(const char *s,
521 int len,
522 const char *errors,
523 int unicode,
524 const char *recode_encoding)
525{
526 int c;
527 char *p, *buf;
528 const char *end;
529 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000530 int newlen = recode_encoding ? 4*len:len;
531 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 if (v == NULL)
533 return NULL;
534 p = buf = PyString_AsString(v);
535 end = s + len;
536 while (s < end) {
537 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000538 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539#ifdef Py_USING_UNICODE
540 if (recode_encoding && (*s & 0x80)) {
541 PyObject *u, *w;
542 char *r;
543 const char* t;
544 int rn;
545 t = s;
546 /* Decode non-ASCII bytes as UTF-8. */
547 while (t < end && (*t & 0x80)) t++;
548 u = PyUnicode_DecodeUTF8(s, t - s, errors);
549 if(!u) goto failed;
550
551 /* Recode them in target encoding. */
552 w = PyUnicode_AsEncodedString(
553 u, recode_encoding, errors);
554 Py_DECREF(u);
555 if (!w) goto failed;
556
557 /* Append bytes to output buffer. */
558 r = PyString_AsString(w);
559 rn = PyString_Size(w);
560 memcpy(p, r, rn);
561 p += rn;
562 Py_DECREF(w);
563 s = t;
564 } else {
565 *p++ = *s++;
566 }
567#else
568 *p++ = *s++;
569#endif
570 continue;
571 }
572 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000573 if (s==end) {
574 PyErr_SetString(PyExc_ValueError,
575 "Trailing \\ in string");
576 goto failed;
577 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 switch (*s++) {
579 /* XXX This assumes ASCII! */
580 case '\n': break;
581 case '\\': *p++ = '\\'; break;
582 case '\'': *p++ = '\''; break;
583 case '\"': *p++ = '\"'; break;
584 case 'b': *p++ = '\b'; break;
585 case 'f': *p++ = '\014'; break; /* FF */
586 case 't': *p++ = '\t'; break;
587 case 'n': *p++ = '\n'; break;
588 case 'r': *p++ = '\r'; break;
589 case 'v': *p++ = '\013'; break; /* VT */
590 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
591 case '0': case '1': case '2': case '3':
592 case '4': case '5': case '6': case '7':
593 c = s[-1] - '0';
594 if ('0' <= *s && *s <= '7') {
595 c = (c<<3) + *s++ - '0';
596 if ('0' <= *s && *s <= '7')
597 c = (c<<3) + *s++ - '0';
598 }
599 *p++ = c;
600 break;
601 case 'x':
602 if (isxdigit(Py_CHARMASK(s[0]))
603 && isxdigit(Py_CHARMASK(s[1]))) {
604 unsigned int x = 0;
605 c = Py_CHARMASK(*s);
606 s++;
607 if (isdigit(c))
608 x = c - '0';
609 else if (islower(c))
610 x = 10 + c - 'a';
611 else
612 x = 10 + c - 'A';
613 x = x << 4;
614 c = Py_CHARMASK(*s);
615 s++;
616 if (isdigit(c))
617 x += c - '0';
618 else if (islower(c))
619 x += 10 + c - 'a';
620 else
621 x += 10 + c - 'A';
622 *p++ = x;
623 break;
624 }
625 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 PyErr_SetString(PyExc_ValueError,
627 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000628 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 }
630 if (strcmp(errors, "replace") == 0) {
631 *p++ = '?';
632 } else if (strcmp(errors, "ignore") == 0)
633 /* do nothing */;
634 else {
635 PyErr_Format(PyExc_ValueError,
636 "decoding error; "
637 "unknown error handling code: %.400s",
638 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000639 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 }
641#ifndef Py_USING_UNICODE
642 case 'u':
643 case 'U':
644 case 'N':
645 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000646 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 "Unicode escapes not legal "
648 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000649 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000650 }
651#endif
652 default:
653 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000654 s--;
655 goto non_esc; /* an arbitry number of unescaped
656 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 }
658 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000659 if (p-buf < newlen)
660 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 return v;
662 failed:
663 Py_DECREF(v);
664 return NULL;
665}
666
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667static int
668string_getsize(register PyObject *op)
669{
670 char *s;
671 int len;
672 if (PyString_AsStringAndSize(op, &s, &len))
673 return -1;
674 return len;
675}
676
677static /*const*/ char *
678string_getbuffer(register PyObject *op)
679{
680 char *s;
681 int len;
682 if (PyString_AsStringAndSize(op, &s, &len))
683 return NULL;
684 return s;
685}
686
Guido van Rossumd7047b31995-01-02 19:07:15 +0000687int
Fred Drakeba096332000-07-09 07:04:36 +0000688PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000690 if (!PyString_Check(op))
691 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693}
694
695/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000696PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (!PyString_Check(op))
699 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000703int
704PyString_AsStringAndSize(register PyObject *obj,
705 register char **s,
706 register int *len)
707{
708 if (s == NULL) {
709 PyErr_BadInternalCall();
710 return -1;
711 }
712
713 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000714#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (PyUnicode_Check(obj)) {
716 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
717 if (obj == NULL)
718 return -1;
719 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000720 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000721#endif
722 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 PyErr_Format(PyExc_TypeError,
724 "expected string or Unicode object, "
725 "%.200s found", obj->ob_type->tp_name);
726 return -1;
727 }
728 }
729
730 *s = PyString_AS_STRING(obj);
731 if (len != NULL)
732 *len = PyString_GET_SIZE(obj);
733 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
734 PyErr_SetString(PyExc_TypeError,
735 "expected string without null bytes");
736 return -1;
737 }
738 return 0;
739}
740
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741/* Methods */
742
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000743static int
Fred Drakeba096332000-07-09 07:04:36 +0000744string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745{
746 int i;
747 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000748 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000749
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000750 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000751 if (! PyString_CheckExact(op)) {
752 int ret;
753 /* A str subclass may have its own __str__ method. */
754 op = (PyStringObject *) PyObject_Str((PyObject *)op);
755 if (op == NULL)
756 return -1;
757 ret = string_print(op, fp, flags);
758 Py_DECREF(op);
759 return ret;
760 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000762#ifdef __VMS
763 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
764#else
765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
766#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000769
Thomas Wouters7e474022000-07-16 12:04:32 +0000770 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000772 if (memchr(op->ob_sval, '\'', op->ob_size) &&
773 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 quote = '"';
775
776 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 for (i = 0; i < op->ob_size; i++) {
778 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000779 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000780 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000781 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\r");
787 else if (c < ' ' || c >= 0x7f)
788 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000789 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794}
795
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000796PyObject *
797PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000799 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000800 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000801 PyObject *v;
802 if (newsize > INT_MAX) {
803 PyErr_SetString(PyExc_OverflowError,
804 "string is too large to make repr");
805 }
806 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 }
810 else {
811 register int i;
812 register char c;
813 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 int quote;
815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000818 if (smartquotes &&
819 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000820 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 quote = '"';
822
Tim Peters9161c8b2001-12-03 01:55:38 +0000823 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000826 /* There's at least enough room for a hex escape
827 and a closing quote. */
828 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000832 else if (c == '\t')
833 *p++ = '\\', *p++ = 't';
834 else if (c == '\n')
835 *p++ = '\\', *p++ = 'n';
836 else if (c == '\r')
837 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000838 else if (c < ' ' || c >= 0x7f) {
839 /* For performance, we don't want to call
840 PyOS_snprintf here (extra layers of
841 function call). */
842 sprintf(p, "\\x%02x", c & 0xff);
843 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000844 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 else
846 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Guido van Rossum189f1df2001-05-01 16:51:53 +0000857static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858string_repr(PyObject *op)
859{
860 return PyString_Repr(op, 1);
861}
862
863static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000864string_str(PyObject *s)
865{
Tim Petersc9933152001-10-16 20:18:24 +0000866 assert(PyString_Check(s));
867 if (PyString_CheckExact(s)) {
868 Py_INCREF(s);
869 return s;
870 }
871 else {
872 /* Subtype -- return genuine string with the same value. */
873 PyStringObject *t = (PyStringObject *) s;
874 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
875 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000876}
877
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878static int
Fred Drakeba096332000-07-09 07:04:36 +0000879string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880{
881 return a->ob_size;
882}
883
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000884static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000885string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 register PyStringObject *op;
889 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 if (PyUnicode_Check(bb))
892 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000894 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000895 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000896 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 return NULL;
898 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000901 if ((a->ob_size == 0 || b->ob_size == 0) &&
902 PyString_CheckExact(a) && PyString_CheckExact(b)) {
903 if (a->ob_size == 0) {
904 Py_INCREF(bb);
905 return bb;
906 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 Py_INCREF(a);
908 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
910 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000911 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000912 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000929 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000930 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000932 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 if (n < 0)
934 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000935 /* watch out for overflows: the size can overflow int,
936 * and the # of bytes needed can overflow size_t
937 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000939 if (n && size / n != a->ob_size) {
940 PyErr_SetString(PyExc_OverflowError,
941 "repeated string is too long");
942 return NULL;
943 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 Py_INCREF(a);
946 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 }
Tim Peterse7c05322004-06-27 17:24:49 +0000948 nbytes = (size_t)size;
949 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000961 op->ob_sval[size] = '\0';
962 if (a->ob_size == 1 && n > 0) {
963 memset(op->ob_sval, a->ob_sval[0] , n);
964 return (PyObject *) op;
965 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000966 i = 0;
967 if (i < size) {
968 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
969 i = (int) a->ob_size;
970 }
971 while (i < size) {
972 j = (i <= size-i) ? i : size-i;
973 memcpy(op->ob_sval+i, op->ob_sval, j);
974 i += j;
975 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977}
978
979/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_slice(register PyStringObject *a, register int i, register int j)
983 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984{
985 if (i < 0)
986 i = 0;
987 if (j < 0)
988 j = 0; /* Avoid signed/unsigned bug in next line */
989 if (j > a->ob_size)
990 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000991 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
992 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 Py_INCREF(a);
994 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995 }
996 if (j < i)
997 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999}
1000
Guido van Rossum9284a572000-03-07 15:53:43 +00001001static int
Fred Drakeba096332000-07-09 07:04:36 +00001002string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001003{
Barry Warsaw817918c2002-08-06 16:58:21 +00001004 const char *lhs, *rhs, *end;
1005 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006
1007 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001008#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001009 if (PyUnicode_Check(el))
1010 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 if (!PyString_Check(el)) {
1013 PyErr_SetString(PyExc_TypeError,
1014 "'in <string>' requires string as left operand");
1015 return -1;
1016 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001018 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001019 rhs = PyString_AS_STRING(el);
1020 lhs = PyString_AS_STRING(a);
1021
1022 /* optimize for a single character */
1023 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001024 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001025
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 while (lhs <= end) {
1028 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001029 return 1;
1030 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001031
Guido van Rossum9284a572000-03-07 15:53:43 +00001032 return 0;
1033}
1034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001036string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001039 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 return NULL;
1043 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001044 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001045 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001046 if (v == NULL)
1047 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001048 else {
1049#ifdef COUNT_ALLOCS
1050 one_strings++;
1051#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001052 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001053 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001054 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055}
1056
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057static PyObject*
1058string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001060 int c;
1061 int len_a, len_b;
1062 int min_len;
1063 PyObject *result;
1064
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001065 /* Make sure both arguments are strings. */
1066 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001067 result = Py_NotImplemented;
1068 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001069 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001070 if (a == b) {
1071 switch (op) {
1072 case Py_EQ:case Py_LE:case Py_GE:
1073 result = Py_True;
1074 goto out;
1075 case Py_NE:case Py_LT:case Py_GT:
1076 result = Py_False;
1077 goto out;
1078 }
1079 }
1080 if (op == Py_EQ) {
1081 /* Supporting Py_NE here as well does not save
1082 much time, since Py_NE is rarely used. */
1083 if (a->ob_size == b->ob_size
1084 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001085 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086 a->ob_size) == 0)) {
1087 result = Py_True;
1088 } else {
1089 result = Py_False;
1090 }
1091 goto out;
1092 }
1093 len_a = a->ob_size; len_b = b->ob_size;
1094 min_len = (len_a < len_b) ? len_a : len_b;
1095 if (min_len > 0) {
1096 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1097 if (c==0)
1098 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1099 }else
1100 c = 0;
1101 if (c == 0)
1102 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1103 switch (op) {
1104 case Py_LT: c = c < 0; break;
1105 case Py_LE: c = c <= 0; break;
1106 case Py_EQ: assert(0); break; /* unreachable */
1107 case Py_NE: c = c != 0; break;
1108 case Py_GT: c = c > 0; break;
1109 case Py_GE: c = c >= 0; break;
1110 default:
1111 result = Py_NotImplemented;
1112 goto out;
1113 }
1114 result = c ? Py_True : Py_False;
1115 out:
1116 Py_INCREF(result);
1117 return result;
1118}
1119
1120int
1121_PyString_Eq(PyObject *o1, PyObject *o2)
1122{
1123 PyStringObject *a, *b;
1124 a = (PyStringObject*)o1;
1125 b = (PyStringObject*)o2;
1126 return a->ob_size == b->ob_size
1127 && *a->ob_sval == *b->ob_sval
1128 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001129}
1130
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131static long
Fred Drakeba096332000-07-09 07:04:36 +00001132string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001134 register int len;
1135 register unsigned char *p;
1136 register long x;
1137
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 if (a->ob_shash != -1)
1139 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001140 len = a->ob_size;
1141 p = (unsigned char *) a->ob_sval;
1142 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001143 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001144 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145 x ^= a->ob_size;
1146 if (x == -1)
1147 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 return x;
1150}
1151
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001152static PyObject*
1153string_subscript(PyStringObject* self, PyObject* item)
1154{
1155 if (PyInt_Check(item)) {
1156 long i = PyInt_AS_LONG(item);
1157 if (i < 0)
1158 i += PyString_GET_SIZE(self);
1159 return string_item(self,i);
1160 }
1161 else if (PyLong_Check(item)) {
1162 long i = PyLong_AsLong(item);
1163 if (i == -1 && PyErr_Occurred())
1164 return NULL;
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PySlice_Check(item)) {
1170 int start, stop, step, slicelength, cur, i;
1171 char* source_buf;
1172 char* result_buf;
1173 PyObject* result;
1174
1175 if (PySlice_GetIndicesEx((PySliceObject*)item,
1176 PyString_GET_SIZE(self),
1177 &start, &stop, &step, &slicelength) < 0) {
1178 return NULL;
1179 }
1180
1181 if (slicelength <= 0) {
1182 return PyString_FromStringAndSize("", 0);
1183 }
1184 else {
1185 source_buf = PyString_AsString((PyObject*)self);
1186 result_buf = PyMem_Malloc(slicelength);
1187
1188 for (cur = start, i = 0; i < slicelength;
1189 cur += step, i++) {
1190 result_buf[i] = source_buf[cur];
1191 }
1192
1193 result = PyString_FromStringAndSize(result_buf,
1194 slicelength);
1195 PyMem_Free(result_buf);
1196 return result;
1197 }
1198 }
1199 else {
1200 PyErr_SetString(PyExc_TypeError,
1201 "string indices must be integers");
1202 return NULL;
1203 }
1204}
1205
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
1209 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001210 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001211 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212 return -1;
1213 }
1214 *ptr = (void *)self->ob_sval;
1215 return self->ob_size;
1216}
1217
1218static int
Fred Drakeba096332000-07-09 07:04:36 +00001219string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220{
Guido van Rossum045e6881997-09-08 18:30:11 +00001221 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001222 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223 return -1;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
1229 if ( lenp )
1230 *lenp = self->ob_size;
1231 return 1;
1232}
1233
Guido van Rossum1db70701998-10-08 02:18:52 +00001234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001236{
1237 if ( index != 0 ) {
1238 PyErr_SetString(PyExc_SystemError,
1239 "accessing non-existent string segment");
1240 return -1;
1241 }
1242 *ptr = self->ob_sval;
1243 return self->ob_size;
1244}
1245
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001246static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001247 (inquiry)string_length, /*sq_length*/
1248 (binaryfunc)string_concat, /*sq_concat*/
1249 (intargfunc)string_repeat, /*sq_repeat*/
1250 (intargfunc)string_item, /*sq_item*/
1251 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001252 0, /*sq_ass_item*/
1253 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001254 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001255};
1256
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001257static PyMappingMethods string_as_mapping = {
1258 (inquiry)string_length,
1259 (binaryfunc)string_subscript,
1260 0,
1261};
1262
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263static PyBufferProcs string_as_buffer = {
1264 (getreadbufferproc)string_buffer_getreadbuf,
1265 (getwritebufferproc)string_buffer_getwritebuf,
1266 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001267 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268};
1269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
1271
1272#define LEFTSTRIP 0
1273#define RIGHTSTRIP 1
1274#define BOTHSTRIP 2
1275
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001276/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001277static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1278
1279#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001281#define SPLIT_APPEND(data, left, right) \
1282 str = PyString_FromStringAndSize((data) + (left), \
1283 (right) - (left)); \
1284 if (str == NULL) \
1285 goto onError; \
1286 if (PyList_Append(list, str)) { \
1287 Py_DECREF(str); \
1288 goto onError; \
1289 } \
1290 else \
1291 Py_DECREF(str);
1292
1293#define SPLIT_INSERT(data, left, right) \
1294 str = PyString_FromStringAndSize((data) + (left), \
1295 (right) - (left)); \
1296 if (str == NULL) \
1297 goto onError; \
1298 if (PyList_Insert(list, 0, str)) { \
1299 Py_DECREF(str); \
1300 goto onError; \
1301 } \
1302 else \
1303 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001306split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001308 int i, j;
1309 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 PyObject *list = PyList_New(0);
1311
1312 if (list == NULL)
1313 return NULL;
1314
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 for (i = j = 0; i < len; ) {
1316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 while (i < len && !isspace(Py_CHARMASK(s[i])))
1320 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 if (maxsplit-- <= 0)
1323 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001325 while (i < len && isspace(Py_CHARMASK(s[i])))
1326 i++;
1327 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 }
1329 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001331 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 Py_DECREF(list);
1336 return NULL;
1337}
1338
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339static PyObject *
1340split_char(const char *s, int len, char ch, int maxcount)
1341{
1342 register int i, j;
1343 PyObject *str;
1344 PyObject *list = PyList_New(0);
1345
1346 if (list == NULL)
1347 return NULL;
1348
1349 for (i = j = 0; i < len; ) {
1350 if (s[i] == ch) {
1351 if (maxcount-- <= 0)
1352 break;
1353 SPLIT_APPEND(s, j, i);
1354 i = j = i + 1;
1355 } else
1356 i++;
1357 }
1358 if (j <= len) {
1359 SPLIT_APPEND(s, j, len);
1360 }
1361 return list;
1362
1363 onError:
1364 Py_DECREF(list);
1365 return NULL;
1366}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001368PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369"S.split([sep [,maxsplit]]) -> list of strings\n\
1370\n\
1371Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001373splits are done. If sep is not specified or is None, any\n\
1374whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375
1376static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001377string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378{
1379 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 int maxsplit = -1;
1381 const char *s = PyString_AS_STRING(self), *sub;
1382 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Guido van Rossum4c08d552000-03-10 22:55:18 +00001384 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 if (maxsplit < 0)
1387 maxsplit = INT_MAX;
1388 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (PyString_Check(subobj)) {
1391 sub = PyString_AS_STRING(subobj);
1392 n = PyString_GET_SIZE(subobj);
1393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001394#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 else if (PyUnicode_Check(subobj))
1396 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1399 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 if (n == 0) {
1402 PyErr_SetString(PyExc_ValueError, "empty separator");
1403 return NULL;
1404 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001405 else if (n == 1)
1406 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407
1408 list = PyList_New(0);
1409 if (list == NULL)
1410 return NULL;
1411
1412 i = j = 0;
1413 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001414 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 if (maxsplit-- <= 0)
1416 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1418 if (item == NULL)
1419 goto fail;
1420 err = PyList_Append(list, item);
1421 Py_DECREF(item);
1422 if (err < 0)
1423 goto fail;
1424 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
1426 else
1427 i++;
1428 }
1429 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1430 if (item == NULL)
1431 goto fail;
1432 err = PyList_Append(list, item);
1433 Py_DECREF(item);
1434 if (err < 0)
1435 goto fail;
1436
1437 return list;
1438
1439 fail:
1440 Py_DECREF(list);
1441 return NULL;
1442}
1443
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001444static PyObject *
1445rsplit_whitespace(const char *s, int len, int maxsplit)
1446{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 int i, j;
1448 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001449 PyObject *list = PyList_New(0);
1450
1451 if (list == NULL)
1452 return NULL;
1453
1454 for (i = j = len - 1; i >= 0; ) {
1455 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1456 i--;
1457 j = i;
1458 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1459 i--;
1460 if (j > i) {
1461 if (maxsplit-- <= 0)
1462 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001464 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1465 i--;
1466 j = i;
1467 }
1468 }
1469 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001471 }
1472 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474 Py_DECREF(list);
1475 return NULL;
1476}
1477
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478static PyObject *
1479rsplit_char(const char *s, int len, char ch, int maxcount)
1480{
1481 register int i, j;
1482 PyObject *str;
1483 PyObject *list = PyList_New(0);
1484
1485 if (list == NULL)
1486 return NULL;
1487
1488 for (i = j = len - 1; i >= 0; ) {
1489 if (s[i] == ch) {
1490 if (maxcount-- <= 0)
1491 break;
1492 SPLIT_INSERT(s, i + 1, j + 1);
1493 j = i = i - 1;
1494 } else
1495 i--;
1496 }
1497 if (j >= -1) {
1498 SPLIT_INSERT(s, 0, j + 1);
1499 }
1500 return list;
1501
1502 onError:
1503 Py_DECREF(list);
1504 return NULL;
1505}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001506
1507PyDoc_STRVAR(rsplit__doc__,
1508"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1509\n\
1510Return a list of the words in the string S, using sep as the\n\
1511delimiter string, starting at the end of the string and working\n\
1512to the front. If maxsplit is given, at most maxsplit splits are\n\
1513done. If sep is not specified or is None, any whitespace string\n\
1514is a separator.");
1515
1516static PyObject *
1517string_rsplit(PyStringObject *self, PyObject *args)
1518{
1519 int len = PyString_GET_SIZE(self), n, i, j, err;
1520 int maxsplit = -1;
1521 const char *s = PyString_AS_STRING(self), *sub;
1522 PyObject *list, *item, *subobj = Py_None;
1523
1524 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1525 return NULL;
1526 if (maxsplit < 0)
1527 maxsplit = INT_MAX;
1528 if (subobj == Py_None)
1529 return rsplit_whitespace(s, len, maxsplit);
1530 if (PyString_Check(subobj)) {
1531 sub = PyString_AS_STRING(subobj);
1532 n = PyString_GET_SIZE(subobj);
1533 }
1534#ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(subobj))
1536 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1537#endif
1538 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1539 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541 if (n == 0) {
1542 PyErr_SetString(PyExc_ValueError, "empty separator");
1543 return NULL;
1544 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001545 else if (n == 1)
1546 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001547
1548 list = PyList_New(0);
1549 if (list == NULL)
1550 return NULL;
1551
1552 j = len;
1553 i = j - n;
1554 while (i >= 0) {
1555 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1556 if (maxsplit-- <= 0)
1557 break;
1558 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1559 if (item == NULL)
1560 goto fail;
1561 err = PyList_Insert(list, 0, item);
1562 Py_DECREF(item);
1563 if (err < 0)
1564 goto fail;
1565 j = i;
1566 i -= n;
1567 }
1568 else
1569 i--;
1570 }
1571 item = PyString_FromStringAndSize(s, j);
1572 if (item == NULL)
1573 goto fail;
1574 err = PyList_Insert(list, 0, item);
1575 Py_DECREF(item);
1576 if (err < 0)
1577 goto fail;
1578
1579 return list;
1580
1581 fail:
1582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.join(sequence) -> string\n\
1589\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592
1593static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001594string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595{
1596 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001597 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 char *p;
1600 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001601 size_t sz = 0;
1602 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001603 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 seq = PySequence_Fast(orig, "");
1606 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001607 if (PyErr_ExceptionMatches(PyExc_TypeError))
1608 PyErr_Format(PyExc_TypeError,
1609 "sequence expected, %.80s found",
1610 orig->ob_type->tp_name);
1611 return NULL;
1612 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001613
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001614 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 if (seqlen == 0) {
1616 Py_DECREF(seq);
1617 return PyString_FromString("");
1618 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001620 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1622 PyErr_Format(PyExc_TypeError,
1623 "sequence item 0: expected string,"
1624 " %.80s found",
1625 item->ob_type->tp_name);
1626 Py_DECREF(seq);
1627 return NULL;
1628 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001629 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001630 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001633
Tim Peters19fe14e2001-01-19 03:03:47 +00001634 /* There are at least two things to join. Do a pre-pass to figure out
1635 * the total amount of space we'll need (sz), see whether any argument
1636 * is absurd, and defer to the Unicode join if appropriate.
1637 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001640 item = PySequence_Fast_GET_ITEM(seq, i);
1641 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001642#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001644 /* Defer to Unicode join.
1645 * CAUTION: There's no gurantee that the
1646 * original sequence can be iterated over
1647 * again, so we must pass seq here.
1648 */
1649 PyObject *result;
1650 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001651 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001652 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001653 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001654#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001656 "sequence item %i: expected string,"
1657 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001658 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001659 Py_DECREF(seq);
1660 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 sz += PyString_GET_SIZE(item);
1663 if (i != 0)
1664 sz += seplen;
1665 if (sz < old_sz || sz > INT_MAX) {
1666 PyErr_SetString(PyExc_OverflowError,
1667 "join() is too long for a Python string");
1668 Py_DECREF(seq);
1669 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001671 }
1672
1673 /* Allocate result space. */
1674 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1675 if (res == NULL) {
1676 Py_DECREF(seq);
1677 return NULL;
1678 }
1679
1680 /* Catenate everything. */
1681 p = PyString_AS_STRING(res);
1682 for (i = 0; i < seqlen; ++i) {
1683 size_t n;
1684 item = PySequence_Fast_GET_ITEM(seq, i);
1685 n = PyString_GET_SIZE(item);
1686 memcpy(p, PyString_AS_STRING(item), n);
1687 p += n;
1688 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 memcpy(p, sep, seplen);
1690 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001691 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693
Jeremy Hylton49048292000-07-11 03:28:17 +00001694 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696}
1697
Tim Peters52e155e2001-06-16 05:42:57 +00001698PyObject *
1699_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001700{
Tim Petersa7259592001-06-16 05:11:17 +00001701 assert(sep != NULL && PyString_Check(sep));
1702 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001704}
1705
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001706static void
1707string_adjust_indices(int *start, int *end, int len)
1708{
1709 if (*end > len)
1710 *end = len;
1711 else if (*end < 0)
1712 *end += len;
1713 if (*end < 0)
1714 *end = 0;
1715 if (*start < 0)
1716 *start += len;
1717 if (*start < 0)
1718 *start = 0;
1719}
1720
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721static long
Fred Drakeba096332000-07-09 07:04:36 +00001722string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001724 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 int len = PyString_GET_SIZE(self);
1726 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001727 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001729 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001730 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 return -2;
1732 if (PyString_Check(subobj)) {
1733 sub = PyString_AS_STRING(subobj);
1734 n = PyString_GET_SIZE(subobj);
1735 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001736#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001738 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001739#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 return -2;
1742
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001743 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 if (dir > 0) {
1746 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 last -= n;
1749 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001750 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 return (long)i;
1752 }
1753 else {
1754 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001755
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 if (n == 0 && i <= last)
1757 return (long)last;
1758 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001759 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 return (long)j;
1761 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001762
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 return -1;
1764}
1765
1766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001767PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768"S.find(sub [,start [,end]]) -> int\n\
1769\n\
1770Return the lowest index in S where substring sub is found,\n\
1771such that sub is contained within s[start,end]. Optional\n\
1772arguments start and end are interpreted as in slice notation.\n\
1773\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001774Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775
1776static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001777string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780 if (result == -2)
1781 return NULL;
1782 return PyInt_FromLong(result);
1783}
1784
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787"S.index(sub [,start [,end]]) -> int\n\
1788\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001789Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790
1791static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001792string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001794 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 if (result == -2)
1796 return NULL;
1797 if (result == -1) {
1798 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001799 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800 return NULL;
1801 }
1802 return PyInt_FromLong(result);
1803}
1804
1805
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807"S.rfind(sub [,start [,end]]) -> int\n\
1808\n\
1809Return the highest index in S where substring sub is found,\n\
1810such that sub is contained within s[start,end]. Optional\n\
1811arguments start and end are interpreted as in slice notation.\n\
1812\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001818 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (result == -2)
1820 return NULL;
1821 return PyInt_FromLong(result);
1822}
1823
1824
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826"S.rindex(sub [,start [,end]]) -> int\n\
1827\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001828Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829
1830static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001831string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001833 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 if (result == -2)
1835 return NULL;
1836 if (result == -1) {
1837 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001838 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 return NULL;
1840 }
1841 return PyInt_FromLong(result);
1842}
1843
1844
1845static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001846do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1847{
1848 char *s = PyString_AS_STRING(self);
1849 int len = PyString_GET_SIZE(self);
1850 char *sep = PyString_AS_STRING(sepobj);
1851 int seplen = PyString_GET_SIZE(sepobj);
1852 int i, j;
1853
1854 i = 0;
1855 if (striptype != RIGHTSTRIP) {
1856 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1857 i++;
1858 }
1859 }
1860
1861 j = len;
1862 if (striptype != LEFTSTRIP) {
1863 do {
1864 j--;
1865 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1866 j++;
1867 }
1868
1869 if (i == 0 && j == len && PyString_CheckExact(self)) {
1870 Py_INCREF(self);
1871 return (PyObject*)self;
1872 }
1873 else
1874 return PyString_FromStringAndSize(s+i, j-i);
1875}
1876
1877
1878static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001879do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880{
1881 char *s = PyString_AS_STRING(self);
1882 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 i = 0;
1885 if (striptype != RIGHTSTRIP) {
1886 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1887 i++;
1888 }
1889 }
1890
1891 j = len;
1892 if (striptype != LEFTSTRIP) {
1893 do {
1894 j--;
1895 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1896 j++;
1897 }
1898
Tim Peters8fa5dd02001-09-12 02:18:30 +00001899 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 Py_INCREF(self);
1901 return (PyObject*)self;
1902 }
1903 else
1904 return PyString_FromStringAndSize(s+i, j-i);
1905}
1906
1907
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001908static PyObject *
1909do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1910{
1911 PyObject *sep = NULL;
1912
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001913 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001914 return NULL;
1915
1916 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001917 if (PyString_Check(sep))
1918 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001919#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001920 else if (PyUnicode_Check(sep)) {
1921 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1922 PyObject *res;
1923 if (uniself==NULL)
1924 return NULL;
1925 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1926 striptype, sep);
1927 Py_DECREF(uniself);
1928 return res;
1929 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001930#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001931 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001933#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001934 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001935#else
1936 "%s arg must be None or str",
1937#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001938 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001939 return NULL;
1940 }
1941 return do_xstrip(self, striptype, sep);
1942 }
1943
1944 return do_strip(self, striptype);
1945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001949"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950\n\
1951Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001952whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955
1956static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, BOTHSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963}
1964
1965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001967"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001970If chars is given and not None, remove characters in chars instead.\n\
1971If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
1973static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976 if (PyTuple_GET_SIZE(args) == 0)
1977 return do_strip(self, LEFTSTRIP); /* Common case */
1978 else
1979 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001984"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001987If chars is given and not None, remove characters in chars instead.\n\
1988If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
1990static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001993 if (PyTuple_GET_SIZE(args) == 0)
1994 return do_strip(self, RIGHTSTRIP); /* Common case */
1995 else
1996 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997}
1998
1999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001"S.lower() -> string\n\
2002\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
2005static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002006string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
2008 char *s = PyString_AS_STRING(self), *s_new;
2009 int i, n = PyString_GET_SIZE(self);
2010 PyObject *new;
2011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 new = PyString_FromStringAndSize(NULL, n);
2013 if (new == NULL)
2014 return NULL;
2015 s_new = PyString_AsString(new);
2016 for (i = 0; i < n; i++) {
2017 int c = Py_CHARMASK(*s++);
2018 if (isupper(c)) {
2019 *s_new = tolower(c);
2020 } else
2021 *s_new = c;
2022 s_new++;
2023 }
2024 return new;
2025}
2026
2027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029"S.upper() -> string\n\
2030\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002031Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032
2033static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002034string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035{
2036 char *s = PyString_AS_STRING(self), *s_new;
2037 int i, n = PyString_GET_SIZE(self);
2038 PyObject *new;
2039
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 new = PyString_FromStringAndSize(NULL, n);
2041 if (new == NULL)
2042 return NULL;
2043 s_new = PyString_AsString(new);
2044 for (i = 0; i < n; i++) {
2045 int c = Py_CHARMASK(*s++);
2046 if (islower(c)) {
2047 *s_new = toupper(c);
2048 } else
2049 *s_new = c;
2050 s_new++;
2051 }
2052 return new;
2053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057"S.title() -> string\n\
2058\n\
2059Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
2062static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002063string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064{
2065 char *s = PyString_AS_STRING(self), *s_new;
2066 int i, n = PyString_GET_SIZE(self);
2067 int previous_is_cased = 0;
2068 PyObject *new;
2069
Guido van Rossum4c08d552000-03-10 22:55:18 +00002070 new = PyString_FromStringAndSize(NULL, n);
2071 if (new == NULL)
2072 return NULL;
2073 s_new = PyString_AsString(new);
2074 for (i = 0; i < n; i++) {
2075 int c = Py_CHARMASK(*s++);
2076 if (islower(c)) {
2077 if (!previous_is_cased)
2078 c = toupper(c);
2079 previous_is_cased = 1;
2080 } else if (isupper(c)) {
2081 if (previous_is_cased)
2082 c = tolower(c);
2083 previous_is_cased = 1;
2084 } else
2085 previous_is_cased = 0;
2086 *s_new++ = c;
2087 }
2088 return new;
2089}
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.capitalize() -> string\n\
2093\n\
2094Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002095capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002098string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
2100 char *s = PyString_AS_STRING(self), *s_new;
2101 int i, n = PyString_GET_SIZE(self);
2102 PyObject *new;
2103
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104 new = PyString_FromStringAndSize(NULL, n);
2105 if (new == NULL)
2106 return NULL;
2107 s_new = PyString_AsString(new);
2108 if (0 < n) {
2109 int c = Py_CHARMASK(*s++);
2110 if (islower(c))
2111 *s_new = toupper(c);
2112 else
2113 *s_new = c;
2114 s_new++;
2115 }
2116 for (i = 1; i < n; i++) {
2117 int c = Py_CHARMASK(*s++);
2118 if (isupper(c))
2119 *s_new = tolower(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
2124 return new;
2125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129"S.count(sub[, start[, end]]) -> int\n\
2130\n\
2131Return the number of occurrences of substring sub in string\n\
2132S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002133interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002136string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 int len = PyString_GET_SIZE(self), n;
2140 int i = 0, last = INT_MAX;
2141 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143
Guido van Rossumc6821402000-05-08 14:08:05 +00002144 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2145 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002147
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 if (PyString_Check(subobj)) {
2149 sub = PyString_AS_STRING(subobj);
2150 n = PyString_GET_SIZE(subobj);
2151 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002152#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002153 else if (PyUnicode_Check(subobj)) {
2154 int count;
2155 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2156 if (count == -1)
2157 return NULL;
2158 else
2159 return PyInt_FromLong((long) count);
2160 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002161#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2163 return NULL;
2164
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002165 string_adjust_indices(&i, &last, len);
2166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 m = last + 1 - n;
2168 if (n == 0)
2169 return PyInt_FromLong((long) (m-i));
2170
2171 r = 0;
2172 while (i < m) {
2173 if (!memcmp(s+i, sub, n)) {
2174 r++;
2175 i += n;
2176 } else {
2177 i++;
2178 }
2179 }
2180 return PyInt_FromLong((long) r);
2181}
2182
2183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185"S.swapcase() -> string\n\
2186\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189
2190static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002191string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192{
2193 char *s = PyString_AS_STRING(self), *s_new;
2194 int i, n = PyString_GET_SIZE(self);
2195 PyObject *new;
2196
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 new = PyString_FromStringAndSize(NULL, n);
2198 if (new == NULL)
2199 return NULL;
2200 s_new = PyString_AsString(new);
2201 for (i = 0; i < n; i++) {
2202 int c = Py_CHARMASK(*s++);
2203 if (islower(c)) {
2204 *s_new = toupper(c);
2205 }
2206 else if (isupper(c)) {
2207 *s_new = tolower(c);
2208 }
2209 else
2210 *s_new = c;
2211 s_new++;
2212 }
2213 return new;
2214}
2215
2216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218"S.translate(table [,deletechars]) -> string\n\
2219\n\
2220Return a copy of the string S, where all characters occurring\n\
2221in the optional argument deletechars are removed, and the\n\
2222remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002223translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224
2225static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002226string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 register char *input, *output;
2229 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 register int i, c, changed = 0;
2231 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 int inlen, tablen, dellen = 0;
2234 PyObject *result;
2235 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002238 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241
2242 if (PyString_Check(tableobj)) {
2243 table1 = PyString_AS_STRING(tableobj);
2244 tablen = PyString_GET_SIZE(tableobj);
2245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002246#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002248 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 parameter; instead a mapping to None will cause characters
2250 to be deleted. */
2251 if (delobj != NULL) {
2252 PyErr_SetString(PyExc_TypeError,
2253 "deletions are implemented differently for unicode");
2254 return NULL;
2255 }
2256 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2257 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002258#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261
Martin v. Löwis00b61272002-12-12 20:03:19 +00002262 if (tablen != 256) {
2263 PyErr_SetString(PyExc_ValueError,
2264 "translation table must be 256 characters long");
2265 return NULL;
2266 }
2267
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 if (delobj != NULL) {
2269 if (PyString_Check(delobj)) {
2270 del_table = PyString_AS_STRING(delobj);
2271 dellen = PyString_GET_SIZE(delobj);
2272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 else if (PyUnicode_Check(delobj)) {
2275 PyErr_SetString(PyExc_TypeError,
2276 "deletions are implemented differently for unicode");
2277 return NULL;
2278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2281 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 }
2283 else {
2284 del_table = NULL;
2285 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 }
2287
2288 table = table1;
2289 inlen = PyString_Size(input_obj);
2290 result = PyString_FromStringAndSize((char *)NULL, inlen);
2291 if (result == NULL)
2292 return NULL;
2293 output_start = output = PyString_AsString(result);
2294 input = PyString_AsString(input_obj);
2295
2296 if (dellen == 0) {
2297 /* If no deletions are required, use faster code */
2298 for (i = inlen; --i >= 0; ) {
2299 c = Py_CHARMASK(*input++);
2300 if (Py_CHARMASK((*output++ = table[c])) != c)
2301 changed = 1;
2302 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002303 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304 return result;
2305 Py_DECREF(result);
2306 Py_INCREF(input_obj);
2307 return input_obj;
2308 }
2309
2310 for (i = 0; i < 256; i++)
2311 trans_table[i] = Py_CHARMASK(table[i]);
2312
2313 for (i = 0; i < dellen; i++)
2314 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2315
2316 for (i = inlen; --i >= 0; ) {
2317 c = Py_CHARMASK(*input++);
2318 if (trans_table[c] != -1)
2319 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2320 continue;
2321 changed = 1;
2322 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002323 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324 Py_DECREF(result);
2325 Py_INCREF(input_obj);
2326 return input_obj;
2327 }
2328 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002329 if (inlen > 0)
2330 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return result;
2332}
2333
2334
2335/* What follows is used for implementing replace(). Perry Stoll. */
2336
2337/*
2338 mymemfind
2339
2340 strstr replacement for arbitrary blocks of memory.
2341
Barry Warsaw51ac5802000-03-20 16:36:48 +00002342 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 contents of memory pointed to by PAT. Returns the index into MEM if
2344 found, or -1 if not found. If len of PAT is greater than length of
2345 MEM, the function returns -1.
2346*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002347static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002348mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349{
2350 register int ii;
2351
2352 /* pattern can not occur in the last pat_len-1 chars */
2353 len -= pat_len;
2354
2355 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002356 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return ii;
2358 }
2359 }
2360 return -1;
2361}
2362
2363/*
2364 mymemcnt
2365
2366 Return the number of distinct times PAT is found in MEM.
2367 meaning mem=1111 and pat==11 returns 2.
2368 mem=11111 and pat==11 also return 2.
2369 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002370static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002371mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372{
2373 register int offset = 0;
2374 int nfound = 0;
2375
2376 while (len >= 0) {
2377 offset = mymemfind(mem, len, pat, pat_len);
2378 if (offset == -1)
2379 break;
2380 mem += offset + pat_len;
2381 len -= offset + pat_len;
2382 nfound++;
2383 }
2384 return nfound;
2385}
2386
2387/*
2388 mymemreplace
2389
Thomas Wouters7e474022000-07-16 12:04:32 +00002390 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 replaced with SUB.
2392
Thomas Wouters7e474022000-07-16 12:04:32 +00002393 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 of PAT in STR, then the original string is returned. Otherwise, a new
2395 string is allocated here and returned.
2396
2397 on return, out_len is:
2398 the length of output string, or
2399 -1 if the input string is returned, or
2400 unchanged if an error occurs (no memory).
2401
2402 return value is:
2403 the new string allocated locally, or
2404 NULL if an error occurred.
2405*/
2406static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002407mymemreplace(const char *str, int len, /* input string */
2408 const char *pat, int pat_len, /* pattern string to find */
2409 const char *sub, int sub_len, /* substitution string */
2410 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002411 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412{
2413 char *out_s;
2414 char *new_s;
2415 int nfound, offset, new_len;
2416
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002417 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 goto return_same;
2419
2420 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002421 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002422 if (count < 0)
2423 count = INT_MAX;
2424 else if (nfound > count)
2425 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 if (nfound == 0)
2427 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002428
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002430 if (new_len == 0) {
2431 /* Have to allocate something for the caller to free(). */
2432 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002433 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002434 return NULL;
2435 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002437 else {
2438 assert(new_len > 0);
2439 new_s = (char *)PyMem_MALLOC(new_len);
2440 if (new_s == NULL)
2441 return NULL;
2442 out_s = new_s;
2443
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002444 if (pat_len > 0) {
2445 for (; nfound > 0; --nfound) {
2446 /* find index of next instance of pattern */
2447 offset = mymemfind(str, len, pat, pat_len);
2448 if (offset == -1)
2449 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002450
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002451 /* copy non matching part of input string */
2452 memcpy(new_s, str, offset);
2453 str += offset + pat_len;
2454 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002455
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002456 /* copy substitute into the output string */
2457 new_s += offset;
2458 memcpy(new_s, sub, sub_len);
2459 new_s += sub_len;
2460 }
2461 /* copy any remaining values into output string */
2462 if (len > 0)
2463 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002464 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002465 else {
2466 for (;;++str, --len) {
2467 memcpy(new_s, sub, sub_len);
2468 new_s += sub_len;
2469 if (--nfound <= 0) {
2470 memcpy(new_s, str, len);
2471 break;
2472 }
2473 *new_s++ = *str;
2474 }
2475 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002476 }
2477 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478 return out_s;
2479
2480 return_same:
2481 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002482 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483}
2484
2485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002486PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002487"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488\n\
2489Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002490old replaced by new. If the optional argument count is\n\
2491given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492
2493static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002494string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 const char *str = PyString_AS_STRING(self), *sub, *repl;
2497 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002498 const int len = PyString_GET_SIZE(self);
2499 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002502 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 if (!PyArg_ParseTuple(args, "OO|i:replace",
2505 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507
2508 if (PyString_Check(subobj)) {
2509 sub = PyString_AS_STRING(subobj);
2510 sub_len = PyString_GET_SIZE(subobj);
2511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002512#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002514 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002516#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2518 return NULL;
2519
2520 if (PyString_Check(replobj)) {
2521 repl = PyString_AS_STRING(replobj);
2522 repl_len = PyString_GET_SIZE(replobj);
2523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002524#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002526 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002528#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2530 return NULL;
2531
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533 if (new_s == NULL) {
2534 PyErr_NoMemory();
2535 return NULL;
2536 }
2537 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002538 if (PyString_CheckExact(self)) {
2539 /* we're returning another reference to self */
2540 new = (PyObject*)self;
2541 Py_INCREF(new);
2542 }
2543 else {
2544 new = PyString_FromStringAndSize(str, len);
2545 if (new == NULL)
2546 return NULL;
2547 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548 }
2549 else {
2550 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002551 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 }
2553 return new;
2554}
2555
2556
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002557PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002558"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002560Return True if S starts with the specified prefix, False otherwise.\n\
2561With optional start, test S beginning at that position.\n\
2562With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
2564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002565string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002568 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570 int plen;
2571 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002572 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574
Guido van Rossumc6821402000-05-08 14:08:05 +00002575 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2576 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577 return NULL;
2578 if (PyString_Check(subobj)) {
2579 prefix = PyString_AS_STRING(subobj);
2580 plen = PyString_GET_SIZE(subobj);
2581 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002582#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002583 else if (PyUnicode_Check(subobj)) {
2584 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002585 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002586 subobj, start, end, -1);
2587 if (rc == -1)
2588 return NULL;
2589 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002590 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002591 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002592#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594 return NULL;
2595
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002596 string_adjust_indices(&start, &end, len);
2597
2598 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002599 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002601 if (end-start >= plen)
2602 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2603 else
2604 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
2607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002608PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002609"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002611Return True if S ends with the specified suffix, False otherwise.\n\
2612With optional start, test S beginning at that position.\n\
2613With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
2615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002616string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002618 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 const char* suffix;
2621 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002623 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625
Guido van Rossumc6821402000-05-08 14:08:05 +00002626 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2627 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 return NULL;
2629 if (PyString_Check(subobj)) {
2630 suffix = PyString_AS_STRING(subobj);
2631 slen = PyString_GET_SIZE(subobj);
2632 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002633#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002634 else if (PyUnicode_Check(subobj)) {
2635 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002636 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002637 subobj, start, end, +1);
2638 if (rc == -1)
2639 return NULL;
2640 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002642 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002643#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002645 return NULL;
2646
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002647 string_adjust_indices(&start, &end, len);
2648
2649 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002650 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002652 if (end-slen > start)
2653 start = end - slen;
2654 if (end-start >= slen)
2655 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2656 else
2657 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658}
2659
2660
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002661PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002662"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002663\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002664Encodes S using the codec registered for encoding. encoding defaults\n\
2665to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002666handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002667a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2668'xmlcharrefreplace' as well as any other name registered with\n\
2669codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002670
2671static PyObject *
2672string_encode(PyStringObject *self, PyObject *args)
2673{
2674 char *encoding = NULL;
2675 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002676 PyObject *v;
2677
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002678 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2679 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002680 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2681 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2682 PyErr_Format(PyExc_TypeError,
2683 "encoder did not return a string/unicode object "
2684 "(type=%.400s)",
2685 v->ob_type->tp_name);
2686 Py_DECREF(v);
2687 return NULL;
2688 }
2689 return v;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002690}
2691
2692
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002693PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002694"S.decode([encoding[,errors]]) -> object\n\
2695\n\
2696Decodes S using the codec registered for encoding. encoding defaults\n\
2697to the default encoding. errors may be given to set a different error\n\
2698handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002699a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2700as well as any other name registerd with codecs.register_error that is\n\
2701able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002702
2703static PyObject *
2704string_decode(PyStringObject *self, PyObject *args)
2705{
2706 char *encoding = NULL;
2707 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002708 PyObject *v;
2709
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002710 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2711 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002712 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2713 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2714 PyErr_Format(PyExc_TypeError,
2715 "decoder did not return a string/unicode object "
2716 "(type=%.400s)",
2717 v->ob_type->tp_name);
2718 Py_DECREF(v);
2719 return NULL;
2720 }
2721 return v;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002722}
2723
2724
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002725PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002726"S.expandtabs([tabsize]) -> string\n\
2727\n\
2728Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002729If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002730
2731static PyObject*
2732string_expandtabs(PyStringObject *self, PyObject *args)
2733{
2734 const char *e, *p;
2735 char *q;
2736 int i, j;
2737 PyObject *u;
2738 int tabsize = 8;
2739
2740 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2741 return NULL;
2742
Thomas Wouters7e474022000-07-16 12:04:32 +00002743 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 i = j = 0;
2745 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2746 for (p = PyString_AS_STRING(self); p < e; p++)
2747 if (*p == '\t') {
2748 if (tabsize > 0)
2749 j += tabsize - (j % tabsize);
2750 }
2751 else {
2752 j++;
2753 if (*p == '\n' || *p == '\r') {
2754 i += j;
2755 j = 0;
2756 }
2757 }
2758
2759 /* Second pass: create output string and fill it */
2760 u = PyString_FromStringAndSize(NULL, i + j);
2761 if (!u)
2762 return NULL;
2763
2764 j = 0;
2765 q = PyString_AS_STRING(u);
2766
2767 for (p = PyString_AS_STRING(self); p < e; p++)
2768 if (*p == '\t') {
2769 if (tabsize > 0) {
2770 i = tabsize - (j % tabsize);
2771 j += i;
2772 while (i--)
2773 *q++ = ' ';
2774 }
2775 }
2776 else {
2777 j++;
2778 *q++ = *p;
2779 if (*p == '\n' || *p == '\r')
2780 j = 0;
2781 }
2782
2783 return u;
2784}
2785
Tim Peters8fa5dd02001-09-12 02:18:30 +00002786static PyObject *
2787pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002788{
2789 PyObject *u;
2790
2791 if (left < 0)
2792 left = 0;
2793 if (right < 0)
2794 right = 0;
2795
Tim Peters8fa5dd02001-09-12 02:18:30 +00002796 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002797 Py_INCREF(self);
2798 return (PyObject *)self;
2799 }
2800
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002801 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002802 left + PyString_GET_SIZE(self) + right);
2803 if (u) {
2804 if (left)
2805 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002806 memcpy(PyString_AS_STRING(u) + left,
2807 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002808 PyString_GET_SIZE(self));
2809 if (right)
2810 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2811 fill, right);
2812 }
2813
2814 return u;
2815}
2816
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002817PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002818"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002819"\n"
2820"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002821"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002822
2823static PyObject *
2824string_ljust(PyStringObject *self, PyObject *args)
2825{
2826 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002827 char fillchar = ' ';
2828
2829 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830 return NULL;
2831
Tim Peters8fa5dd02001-09-12 02:18:30 +00002832 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833 Py_INCREF(self);
2834 return (PyObject*) self;
2835 }
2836
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002837 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002838}
2839
2840
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002841PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002842"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002843"\n"
2844"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002845"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002846
2847static PyObject *
2848string_rjust(PyStringObject *self, PyObject *args)
2849{
2850 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002851 char fillchar = ' ';
2852
2853 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854 return NULL;
2855
Tim Peters8fa5dd02001-09-12 02:18:30 +00002856 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857 Py_INCREF(self);
2858 return (PyObject*) self;
2859 }
2860
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002861 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002862}
2863
2864
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002865PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002866"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002867"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002868"Return S centered in a string of length width. Padding is\n"
2869"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002870
2871static PyObject *
2872string_center(PyStringObject *self, PyObject *args)
2873{
2874 int marg, left;
2875 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002876 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002878 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002879 return NULL;
2880
Tim Peters8fa5dd02001-09-12 02:18:30 +00002881 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882 Py_INCREF(self);
2883 return (PyObject*) self;
2884 }
2885
2886 marg = width - PyString_GET_SIZE(self);
2887 left = marg / 2 + (marg & width & 1);
2888
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890}
2891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002892PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002893"S.zfill(width) -> string\n"
2894"\n"
2895"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002896"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002897
2898static PyObject *
2899string_zfill(PyStringObject *self, PyObject *args)
2900{
2901 int fill;
2902 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002903 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002904
2905 int width;
2906 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2907 return NULL;
2908
2909 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002910 if (PyString_CheckExact(self)) {
2911 Py_INCREF(self);
2912 return (PyObject*) self;
2913 }
2914 else
2915 return PyString_FromStringAndSize(
2916 PyString_AS_STRING(self),
2917 PyString_GET_SIZE(self)
2918 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002919 }
2920
2921 fill = width - PyString_GET_SIZE(self);
2922
2923 s = pad(self, fill, 0, '0');
2924
2925 if (s == NULL)
2926 return NULL;
2927
2928 p = PyString_AS_STRING(s);
2929 if (p[fill] == '+' || p[fill] == '-') {
2930 /* move sign to beginning of string */
2931 p[0] = p[fill];
2932 p[fill] = '0';
2933 }
2934
2935 return (PyObject*) s;
2936}
2937
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002938PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002939"S.isspace() -> bool\n\
2940\n\
2941Return True if all characters in S are whitespace\n\
2942and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002943
2944static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002945string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946{
Fred Drakeba096332000-07-09 07:04:36 +00002947 register const unsigned char *p
2948 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002949 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002950
Guido van Rossum4c08d552000-03-10 22:55:18 +00002951 /* Shortcut for single character strings */
2952 if (PyString_GET_SIZE(self) == 1 &&
2953 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002954 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002956 /* Special case for empty strings */
2957 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002958 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002959
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960 e = p + PyString_GET_SIZE(self);
2961 for (; p < e; p++) {
2962 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002963 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002965 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002966}
2967
2968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002969PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002970"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002971\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002972Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002973and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002974
2975static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002976string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002977{
Fred Drakeba096332000-07-09 07:04:36 +00002978 register const unsigned char *p
2979 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002980 register const unsigned char *e;
2981
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002982 /* Shortcut for single character strings */
2983 if (PyString_GET_SIZE(self) == 1 &&
2984 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002985 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002986
2987 /* Special case for empty strings */
2988 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002989 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002990
2991 e = p + PyString_GET_SIZE(self);
2992 for (; p < e; p++) {
2993 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002994 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002995 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002996 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002997}
2998
2999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003000PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003002\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003003Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003004and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003005
3006static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003007string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003008{
Fred Drakeba096332000-07-09 07:04:36 +00003009 register const unsigned char *p
3010 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003011 register const unsigned char *e;
3012
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013 /* Shortcut for single character strings */
3014 if (PyString_GET_SIZE(self) == 1 &&
3015 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003016 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003017
3018 /* Special case for empty strings */
3019 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003020 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003021
3022 e = p + PyString_GET_SIZE(self);
3023 for (; p < e; p++) {
3024 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003025 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003026 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003027 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003028}
3029
3030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003031PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003032"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003034Return True if all characters in S are digits\n\
3035and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003036
3037static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003038string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039{
Fred Drakeba096332000-07-09 07:04:36 +00003040 register const unsigned char *p
3041 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003042 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043
Guido van Rossum4c08d552000-03-10 22:55:18 +00003044 /* Shortcut for single character strings */
3045 if (PyString_GET_SIZE(self) == 1 &&
3046 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003047 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003049 /* Special case for empty strings */
3050 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003051 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003052
Guido van Rossum4c08d552000-03-10 22:55:18 +00003053 e = p + PyString_GET_SIZE(self);
3054 for (; p < e; p++) {
3055 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003056 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003058 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059}
3060
3061
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003062PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003063"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003064\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003065Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003066at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067
3068static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003069string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070{
Fred Drakeba096332000-07-09 07:04:36 +00003071 register const unsigned char *p
3072 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003073 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074 int cased;
3075
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076 /* Shortcut for single character strings */
3077 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003078 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003080 /* Special case for empty strings */
3081 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003082 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003083
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084 e = p + PyString_GET_SIZE(self);
3085 cased = 0;
3086 for (; p < e; p++) {
3087 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003089 else if (!cased && islower(*p))
3090 cased = 1;
3091 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003092 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093}
3094
3095
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003096PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003097"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003098\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003099Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003100at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101
3102static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003103string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104{
Fred Drakeba096332000-07-09 07:04:36 +00003105 register const unsigned char *p
3106 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003107 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003108 int cased;
3109
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110 /* Shortcut for single character strings */
3111 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003112 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003113
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003114 /* Special case for empty strings */
3115 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003116 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003117
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118 e = p + PyString_GET_SIZE(self);
3119 cased = 0;
3120 for (; p < e; p++) {
3121 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123 else if (!cased && isupper(*p))
3124 cased = 1;
3125 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127}
3128
3129
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003130PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003131"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003132\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003133Return True if S is a titlecased string and there is at least one\n\
3134character in S, i.e. uppercase characters may only follow uncased\n\
3135characters and lowercase characters only cased ones. Return False\n\
3136otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137
3138static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003139string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140{
Fred Drakeba096332000-07-09 07:04:36 +00003141 register const unsigned char *p
3142 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003143 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003144 int cased, previous_is_cased;
3145
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 /* Shortcut for single character strings */
3147 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003148 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003150 /* Special case for empty strings */
3151 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003152 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003153
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 e = p + PyString_GET_SIZE(self);
3155 cased = 0;
3156 previous_is_cased = 0;
3157 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003158 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159
3160 if (isupper(ch)) {
3161 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003162 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163 previous_is_cased = 1;
3164 cased = 1;
3165 }
3166 else if (islower(ch)) {
3167 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003168 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 previous_is_cased = 1;
3170 cased = 1;
3171 }
3172 else
3173 previous_is_cased = 0;
3174 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176}
3177
3178
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003179PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003180"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181\n\
3182Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003183Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003184is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003185
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186static PyObject*
3187string_splitlines(PyStringObject *self, PyObject *args)
3188{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003189 register int i;
3190 register int j;
3191 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003192 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193 PyObject *list;
3194 PyObject *str;
3195 char *data;
3196
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003197 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198 return NULL;
3199
3200 data = PyString_AS_STRING(self);
3201 len = PyString_GET_SIZE(self);
3202
Guido van Rossum4c08d552000-03-10 22:55:18 +00003203 list = PyList_New(0);
3204 if (!list)
3205 goto onError;
3206
3207 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003208 int eol;
3209
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 /* Find a line and append it */
3211 while (i < len && data[i] != '\n' && data[i] != '\r')
3212 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213
3214 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003215 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 if (i < len) {
3217 if (data[i] == '\r' && i + 1 < len &&
3218 data[i+1] == '\n')
3219 i += 2;
3220 else
3221 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003222 if (keepends)
3223 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003225 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003226 j = i;
3227 }
3228 if (j < len) {
3229 SPLIT_APPEND(data, j, len);
3230 }
3231
3232 return list;
3233
3234 onError:
3235 Py_DECREF(list);
3236 return NULL;
3237}
3238
3239#undef SPLIT_APPEND
3240
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003241static PyObject *
3242string_getnewargs(PyStringObject *v)
3243{
3244 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3245}
3246
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003247
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003248static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003249string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003250 /* Counterparts of the obsolete stropmodule functions; except
3251 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003252 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3253 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003254 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003255 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3256 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003257 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3258 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3259 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3260 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3261 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3262 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3263 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003264 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3265 capitalize__doc__},
3266 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3267 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3268 endswith__doc__},
3269 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3270 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3271 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3272 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3273 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3274 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3275 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3276 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3277 startswith__doc__},
3278 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3279 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3280 swapcase__doc__},
3281 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3282 translate__doc__},
3283 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3284 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3285 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3286 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3287 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3288 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3289 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3290 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3291 expandtabs__doc__},
3292 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3293 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003294 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003295 {NULL, NULL} /* sentinel */
3296};
3297
Jeremy Hylton938ace62002-07-17 16:30:39 +00003298static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003299str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3300
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003301static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003302string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003303{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003304 PyObject *x = NULL;
3305 static char *kwlist[] = {"object", 0};
3306
Guido van Rossumae960af2001-08-30 03:11:59 +00003307 if (type != &PyString_Type)
3308 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003309 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3310 return NULL;
3311 if (x == NULL)
3312 return PyString_FromString("");
3313 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003314}
3315
Guido van Rossumae960af2001-08-30 03:11:59 +00003316static PyObject *
3317str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3318{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003319 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003320 int n;
3321
3322 assert(PyType_IsSubtype(type, &PyString_Type));
3323 tmp = string_new(&PyString_Type, args, kwds);
3324 if (tmp == NULL)
3325 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003326 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003327 n = PyString_GET_SIZE(tmp);
3328 pnew = type->tp_alloc(type, n);
3329 if (pnew != NULL) {
3330 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003331 ((PyStringObject *)pnew)->ob_shash =
3332 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003333 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003334 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003335 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003336 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003337}
3338
Guido van Rossumcacfc072002-05-24 19:01:59 +00003339static PyObject *
3340basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3341{
3342 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003343 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003344 return NULL;
3345}
3346
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003347static PyObject *
3348string_mod(PyObject *v, PyObject *w)
3349{
3350 if (!PyString_Check(v)) {
3351 Py_INCREF(Py_NotImplemented);
3352 return Py_NotImplemented;
3353 }
3354 return PyString_Format(v, w);
3355}
3356
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003357PyDoc_STRVAR(basestring_doc,
3358"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003359
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003360static PyNumberMethods string_as_number = {
3361 0, /*nb_add*/
3362 0, /*nb_subtract*/
3363 0, /*nb_multiply*/
3364 0, /*nb_divide*/
3365 string_mod, /*nb_remainder*/
3366};
3367
3368
Guido van Rossumcacfc072002-05-24 19:01:59 +00003369PyTypeObject PyBaseString_Type = {
3370 PyObject_HEAD_INIT(&PyType_Type)
3371 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003372 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003373 0,
3374 0,
3375 0, /* tp_dealloc */
3376 0, /* tp_print */
3377 0, /* tp_getattr */
3378 0, /* tp_setattr */
3379 0, /* tp_compare */
3380 0, /* tp_repr */
3381 0, /* tp_as_number */
3382 0, /* tp_as_sequence */
3383 0, /* tp_as_mapping */
3384 0, /* tp_hash */
3385 0, /* tp_call */
3386 0, /* tp_str */
3387 0, /* tp_getattro */
3388 0, /* tp_setattro */
3389 0, /* tp_as_buffer */
3390 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3391 basestring_doc, /* tp_doc */
3392 0, /* tp_traverse */
3393 0, /* tp_clear */
3394 0, /* tp_richcompare */
3395 0, /* tp_weaklistoffset */
3396 0, /* tp_iter */
3397 0, /* tp_iternext */
3398 0, /* tp_methods */
3399 0, /* tp_members */
3400 0, /* tp_getset */
3401 &PyBaseObject_Type, /* tp_base */
3402 0, /* tp_dict */
3403 0, /* tp_descr_get */
3404 0, /* tp_descr_set */
3405 0, /* tp_dictoffset */
3406 0, /* tp_init */
3407 0, /* tp_alloc */
3408 basestring_new, /* tp_new */
3409 0, /* tp_free */
3410};
3411
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003412PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003413"str(object) -> string\n\
3414\n\
3415Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003416If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003417
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003418PyTypeObject PyString_Type = {
3419 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003420 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003421 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003422 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003423 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003424 (destructor)string_dealloc, /* tp_dealloc */
3425 (printfunc)string_print, /* tp_print */
3426 0, /* tp_getattr */
3427 0, /* tp_setattr */
3428 0, /* tp_compare */
3429 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003430 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003431 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003432 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003433 (hashfunc)string_hash, /* tp_hash */
3434 0, /* tp_call */
3435 (reprfunc)string_str, /* tp_str */
3436 PyObject_GenericGetAttr, /* tp_getattro */
3437 0, /* tp_setattro */
3438 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003439 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3440 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003441 string_doc, /* tp_doc */
3442 0, /* tp_traverse */
3443 0, /* tp_clear */
3444 (richcmpfunc)string_richcompare, /* tp_richcompare */
3445 0, /* tp_weaklistoffset */
3446 0, /* tp_iter */
3447 0, /* tp_iternext */
3448 string_methods, /* tp_methods */
3449 0, /* tp_members */
3450 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003451 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003452 0, /* tp_dict */
3453 0, /* tp_descr_get */
3454 0, /* tp_descr_set */
3455 0, /* tp_dictoffset */
3456 0, /* tp_init */
3457 0, /* tp_alloc */
3458 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003459 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003460};
3461
3462void
Fred Drakeba096332000-07-09 07:04:36 +00003463PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003464{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003465 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003466 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003467 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003468 if (w == NULL || !PyString_Check(*pv)) {
3469 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003470 *pv = NULL;
3471 return;
3472 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003473 v = string_concat((PyStringObject *) *pv, w);
3474 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003475 *pv = v;
3476}
3477
Guido van Rossum013142a1994-08-30 08:19:36 +00003478void
Fred Drakeba096332000-07-09 07:04:36 +00003479PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003480{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003481 PyString_Concat(pv, w);
3482 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003483}
3484
3485
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003486/* The following function breaks the notion that strings are immutable:
3487 it changes the size of a string. We get away with this only if there
3488 is only one module referencing the object. You can also think of it
3489 as creating a new string object and destroying the old one, only
3490 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003491 already be known to some other part of the code...
3492 Note that if there's not enough memory to resize the string, the original
3493 string object at *pv is deallocated, *pv is set to NULL, an "out of
3494 memory" exception is set, and -1 is returned. Else (on success) 0 is
3495 returned, and the value in *pv may or may not be the same as on input.
3496 As always, an extra byte is allocated for a trailing \0 byte (newsize
3497 does *not* include that), and a trailing \0 byte is stored.
3498*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003499
3500int
Fred Drakeba096332000-07-09 07:04:36 +00003501_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003502{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003503 register PyObject *v;
3504 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003505 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003506 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003507 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003508 Py_DECREF(v);
3509 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003510 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003511 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003512 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003513 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003514 _Py_ForgetReference(v);
3515 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003516 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003517 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003518 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003519 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003520 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003521 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003522 _Py_NewReference(*pv);
3523 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003524 sv->ob_size = newsize;
3525 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003526 return 0;
3527}
Guido van Rossume5372401993-03-16 12:15:04 +00003528
3529/* Helpers for formatstring */
3530
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003531static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003532getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003533{
3534 int argidx = *p_argidx;
3535 if (argidx < arglen) {
3536 (*p_argidx)++;
3537 if (arglen < 0)
3538 return args;
3539 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003540 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003541 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 PyErr_SetString(PyExc_TypeError,
3543 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003544 return NULL;
3545}
3546
Tim Peters38fd5b62000-09-21 05:43:11 +00003547/* Format codes
3548 * F_LJUST '-'
3549 * F_SIGN '+'
3550 * F_BLANK ' '
3551 * F_ALT '#'
3552 * F_ZERO '0'
3553 */
Guido van Rossume5372401993-03-16 12:15:04 +00003554#define F_LJUST (1<<0)
3555#define F_SIGN (1<<1)
3556#define F_BLANK (1<<2)
3557#define F_ALT (1<<3)
3558#define F_ZERO (1<<4)
3559
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003560static int
Fred Drakeba096332000-07-09 07:04:36 +00003561formatfloat(char *buf, size_t buflen, int flags,
3562 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003563{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003564 /* fmt = '%#.' + `prec` + `type`
3565 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003566 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003567 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003568 x = PyFloat_AsDouble(v);
3569 if (x == -1.0 && PyErr_Occurred()) {
3570 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003571 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003572 }
Guido van Rossume5372401993-03-16 12:15:04 +00003573 if (prec < 0)
3574 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003575 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3576 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003577 /* Worst case length calc to ensure no buffer overrun:
3578
3579 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003580 fmt = %#.<prec>g
3581 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003582 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003583 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003584
3585 'f' formats:
3586 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3587 len = 1 + 50 + 1 + prec = 52 + prec
3588
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003589 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003590 always given), therefore increase the length by one.
3591
3592 */
3593 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3594 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003595 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003596 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003597 return -1;
3598 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003599 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3600 (flags&F_ALT) ? "#" : "",
3601 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003602 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003603 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003604}
3605
Tim Peters38fd5b62000-09-21 05:43:11 +00003606/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3607 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3608 * Python's regular ints.
3609 * Return value: a new PyString*, or NULL if error.
3610 * . *pbuf is set to point into it,
3611 * *plen set to the # of chars following that.
3612 * Caller must decref it when done using pbuf.
3613 * The string starting at *pbuf is of the form
3614 * "-"? ("0x" | "0X")? digit+
3615 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003616 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003617 * There will be at least prec digits, zero-filled on the left if
3618 * necessary to get that many.
3619 * val object to be converted
3620 * flags bitmask of format flags; only F_ALT is looked at
3621 * prec minimum number of digits; 0-fill on left if needed
3622 * type a character in [duoxX]; u acts the same as d
3623 *
3624 * CAUTION: o, x and X conversions on regular ints can never
3625 * produce a '-' sign, but can for Python's unbounded ints.
3626 */
3627PyObject*
3628_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3629 char **pbuf, int *plen)
3630{
3631 PyObject *result = NULL;
3632 char *buf;
3633 int i;
3634 int sign; /* 1 if '-', else 0 */
3635 int len; /* number of characters */
3636 int numdigits; /* len == numnondigits + numdigits */
3637 int numnondigits = 0;
3638
3639 switch (type) {
3640 case 'd':
3641 case 'u':
3642 result = val->ob_type->tp_str(val);
3643 break;
3644 case 'o':
3645 result = val->ob_type->tp_as_number->nb_oct(val);
3646 break;
3647 case 'x':
3648 case 'X':
3649 numnondigits = 2;
3650 result = val->ob_type->tp_as_number->nb_hex(val);
3651 break;
3652 default:
3653 assert(!"'type' not in [duoxX]");
3654 }
3655 if (!result)
3656 return NULL;
3657
3658 /* To modify the string in-place, there can only be one reference. */
3659 if (result->ob_refcnt != 1) {
3660 PyErr_BadInternalCall();
3661 return NULL;
3662 }
3663 buf = PyString_AsString(result);
3664 len = PyString_Size(result);
3665 if (buf[len-1] == 'L') {
3666 --len;
3667 buf[len] = '\0';
3668 }
3669 sign = buf[0] == '-';
3670 numnondigits += sign;
3671 numdigits = len - numnondigits;
3672 assert(numdigits > 0);
3673
Tim Petersfff53252001-04-12 18:38:48 +00003674 /* Get rid of base marker unless F_ALT */
3675 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003676 /* Need to skip 0x, 0X or 0. */
3677 int skipped = 0;
3678 switch (type) {
3679 case 'o':
3680 assert(buf[sign] == '0');
3681 /* If 0 is only digit, leave it alone. */
3682 if (numdigits > 1) {
3683 skipped = 1;
3684 --numdigits;
3685 }
3686 break;
3687 case 'x':
3688 case 'X':
3689 assert(buf[sign] == '0');
3690 assert(buf[sign + 1] == 'x');
3691 skipped = 2;
3692 numnondigits -= 2;
3693 break;
3694 }
3695 if (skipped) {
3696 buf += skipped;
3697 len -= skipped;
3698 if (sign)
3699 buf[0] = '-';
3700 }
3701 assert(len == numnondigits + numdigits);
3702 assert(numdigits > 0);
3703 }
3704
3705 /* Fill with leading zeroes to meet minimum width. */
3706 if (prec > numdigits) {
3707 PyObject *r1 = PyString_FromStringAndSize(NULL,
3708 numnondigits + prec);
3709 char *b1;
3710 if (!r1) {
3711 Py_DECREF(result);
3712 return NULL;
3713 }
3714 b1 = PyString_AS_STRING(r1);
3715 for (i = 0; i < numnondigits; ++i)
3716 *b1++ = *buf++;
3717 for (i = 0; i < prec - numdigits; i++)
3718 *b1++ = '0';
3719 for (i = 0; i < numdigits; i++)
3720 *b1++ = *buf++;
3721 *b1 = '\0';
3722 Py_DECREF(result);
3723 result = r1;
3724 buf = PyString_AS_STRING(result);
3725 len = numnondigits + prec;
3726 }
3727
3728 /* Fix up case for hex conversions. */
3729 switch (type) {
3730 case 'x':
3731 /* Need to convert all upper case letters to lower case. */
3732 for (i = 0; i < len; i++)
3733 if (buf[i] >= 'A' && buf[i] <= 'F')
3734 buf[i] += 'a'-'A';
3735 break;
3736 case 'X':
3737 /* Need to convert 0x to 0X (and -0x to -0X). */
3738 if (buf[sign + 1] == 'x')
3739 buf[sign + 1] = 'X';
3740 break;
3741 }
3742 *pbuf = buf;
3743 *plen = len;
3744 return result;
3745}
3746
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003747static int
Fred Drakeba096332000-07-09 07:04:36 +00003748formatint(char *buf, size_t buflen, int flags,
3749 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003750{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003751 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003752 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3753 + 1 + 1 = 24 */
3754 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003755 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003756 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003757
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003758 x = PyInt_AsLong(v);
3759 if (x == -1 && PyErr_Occurred()) {
3760 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003761 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003762 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003763 if (x < 0 && type == 'u') {
3764 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003765 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003766 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3767 sign = "-";
3768 else
3769 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003770 if (prec < 0)
3771 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003772
3773 if ((flags & F_ALT) &&
3774 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003775 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003776 * of issues that cause pain:
3777 * - when 0 is being converted, the C standard leaves off
3778 * the '0x' or '0X', which is inconsistent with other
3779 * %#x/%#X conversions and inconsistent with Python's
3780 * hex() function
3781 * - there are platforms that violate the standard and
3782 * convert 0 with the '0x' or '0X'
3783 * (Metrowerks, Compaq Tru64)
3784 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003785 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003786 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003787 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003788 * We can achieve the desired consistency by inserting our
3789 * own '0x' or '0X' prefix, and substituting %x/%X in place
3790 * of %#x/%#X.
3791 *
3792 * Note that this is the same approach as used in
3793 * formatint() in unicodeobject.c
3794 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003795 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3796 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003797 }
3798 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003799 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3800 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003801 prec, type);
3802 }
3803
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003804 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3805 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003806 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003807 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003808 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003809 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003810 return -1;
3811 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003812 if (sign[0])
3813 PyOS_snprintf(buf, buflen, fmt, -x);
3814 else
3815 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003816 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003817}
3818
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003819static int
Fred Drakeba096332000-07-09 07:04:36 +00003820formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003821{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003822 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003823 if (PyString_Check(v)) {
3824 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003825 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003826 }
3827 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003828 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003829 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003830 }
3831 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003832 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003833}
3834
Guido van Rossum013142a1994-08-30 08:19:36 +00003835
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003836/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3837
3838 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3839 chars are formatted. XXX This is a magic number. Each formatting
3840 routine does bounds checking to ensure no overflow, but a better
3841 solution may be to malloc a buffer of appropriate size for each
3842 format. For now, the current solution is sufficient.
3843*/
3844#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003845
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003846PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003847PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003848{
3849 char *fmt, *res;
3850 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003851 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003852 PyObject *result, *orig_args;
3853#ifdef Py_USING_UNICODE
3854 PyObject *v, *w;
3855#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003856 PyObject *dict = NULL;
3857 if (format == NULL || !PyString_Check(format) || args == NULL) {
3858 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003859 return NULL;
3860 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003861 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003862 fmt = PyString_AS_STRING(format);
3863 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003864 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003865 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003866 if (result == NULL)
3867 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003868 res = PyString_AsString(result);
3869 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003870 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003871 argidx = 0;
3872 }
3873 else {
3874 arglen = -1;
3875 argidx = -2;
3876 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003877 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3878 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003879 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003880 while (--fmtcnt >= 0) {
3881 if (*fmt != '%') {
3882 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003883 rescnt = fmtcnt + 100;
3884 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003885 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003886 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003887 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003888 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003889 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003890 }
3891 *res++ = *fmt++;
3892 }
3893 else {
3894 /* Got a format specifier */
3895 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003896 int width = -1;
3897 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003898 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003899 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003900 PyObject *v = NULL;
3901 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003902 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003903 int sign;
3904 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003905 char formatbuf[FORMATBUFLEN];
3906 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003907#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003908 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003909 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003910#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003911
Guido van Rossumda9c2711996-12-05 21:58:58 +00003912 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003913 if (*fmt == '(') {
3914 char *keystart;
3915 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003916 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003917 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003918
3919 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003920 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003921 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003922 goto error;
3923 }
3924 ++fmt;
3925 --fmtcnt;
3926 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003927 /* Skip over balanced parentheses */
3928 while (pcount > 0 && --fmtcnt >= 0) {
3929 if (*fmt == ')')
3930 --pcount;
3931 else if (*fmt == '(')
3932 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003933 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003934 }
3935 keylen = fmt - keystart - 1;
3936 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003937 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003938 "incomplete format key");
3939 goto error;
3940 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003941 key = PyString_FromStringAndSize(keystart,
3942 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003943 if (key == NULL)
3944 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003945 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003946 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003947 args_owned = 0;
3948 }
3949 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003950 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003951 if (args == NULL) {
3952 goto error;
3953 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003954 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003955 arglen = -1;
3956 argidx = -2;
3957 }
Guido van Rossume5372401993-03-16 12:15:04 +00003958 while (--fmtcnt >= 0) {
3959 switch (c = *fmt++) {
3960 case '-': flags |= F_LJUST; continue;
3961 case '+': flags |= F_SIGN; continue;
3962 case ' ': flags |= F_BLANK; continue;
3963 case '#': flags |= F_ALT; continue;
3964 case '0': flags |= F_ZERO; continue;
3965 }
3966 break;
3967 }
3968 if (c == '*') {
3969 v = getnextarg(args, arglen, &argidx);
3970 if (v == NULL)
3971 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003972 if (!PyInt_Check(v)) {
3973 PyErr_SetString(PyExc_TypeError,
3974 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003975 goto error;
3976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003977 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003978 if (width < 0) {
3979 flags |= F_LJUST;
3980 width = -width;
3981 }
Guido van Rossume5372401993-03-16 12:15:04 +00003982 if (--fmtcnt >= 0)
3983 c = *fmt++;
3984 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003985 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003986 width = c - '0';
3987 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003988 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003989 if (!isdigit(c))
3990 break;
3991 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003992 PyErr_SetString(
3993 PyExc_ValueError,
3994 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003995 goto error;
3996 }
3997 width = width*10 + (c - '0');
3998 }
3999 }
4000 if (c == '.') {
4001 prec = 0;
4002 if (--fmtcnt >= 0)
4003 c = *fmt++;
4004 if (c == '*') {
4005 v = getnextarg(args, arglen, &argidx);
4006 if (v == NULL)
4007 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004008 if (!PyInt_Check(v)) {
4009 PyErr_SetString(
4010 PyExc_TypeError,
4011 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004012 goto error;
4013 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004014 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004015 if (prec < 0)
4016 prec = 0;
4017 if (--fmtcnt >= 0)
4018 c = *fmt++;
4019 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004020 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004021 prec = c - '0';
4022 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004023 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004024 if (!isdigit(c))
4025 break;
4026 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004027 PyErr_SetString(
4028 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004029 "prec too big");
4030 goto error;
4031 }
4032 prec = prec*10 + (c - '0');
4033 }
4034 }
4035 } /* prec */
4036 if (fmtcnt >= 0) {
4037 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004038 if (--fmtcnt >= 0)
4039 c = *fmt++;
4040 }
4041 }
4042 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043 PyErr_SetString(PyExc_ValueError,
4044 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004045 goto error;
4046 }
4047 if (c != '%') {
4048 v = getnextarg(args, arglen, &argidx);
4049 if (v == NULL)
4050 goto error;
4051 }
4052 sign = 0;
4053 fill = ' ';
4054 switch (c) {
4055 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004056 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004057 len = 1;
4058 break;
4059 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004060#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004061 if (PyUnicode_Check(v)) {
4062 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004063 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004064 goto unicode;
4065 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004066#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004067 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004068 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004069 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004070 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004071 else
4072 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004073 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004074 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004075 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004076 /* XXX Note: this should never happen,
4077 since PyObject_Repr() and
4078 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004079 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004080 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004081 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004082 goto error;
4083 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004084 pbuf = PyString_AS_STRING(temp);
4085 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004086 if (prec >= 0 && len > prec)
4087 len = prec;
4088 break;
4089 case 'i':
4090 case 'd':
4091 case 'u':
4092 case 'o':
4093 case 'x':
4094 case 'X':
4095 if (c == 'i')
4096 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004097 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004098 temp = _PyString_FormatLong(v, flags,
4099 prec, c, &pbuf, &len);
4100 if (!temp)
4101 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004102 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004103 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004104 else {
4105 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004106 len = formatint(pbuf,
4107 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004108 flags, prec, c, v);
4109 if (len < 0)
4110 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004111 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004112 }
4113 if (flags & F_ZERO)
4114 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004115 break;
4116 case 'e':
4117 case 'E':
4118 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004119 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004120 case 'g':
4121 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004122 if (c == 'F')
4123 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004124 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004125 len = formatfloat(pbuf, sizeof(formatbuf),
4126 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004127 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004128 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004129 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004130 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004131 fill = '0';
4132 break;
4133 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004134#ifdef Py_USING_UNICODE
4135 if (PyUnicode_Check(v)) {
4136 fmt = fmt_start;
4137 argidx = argidx_start;
4138 goto unicode;
4139 }
4140#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004141 pbuf = formatbuf;
4142 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004143 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004144 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004145 break;
4146 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004147 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004148 "unsupported format character '%c' (0x%x) "
4149 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004150 c, c,
4151 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004152 goto error;
4153 }
4154 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004155 if (*pbuf == '-' || *pbuf == '+') {
4156 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004157 len--;
4158 }
4159 else if (flags & F_SIGN)
4160 sign = '+';
4161 else if (flags & F_BLANK)
4162 sign = ' ';
4163 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004164 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004165 }
4166 if (width < len)
4167 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004168 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004169 reslen -= rescnt;
4170 rescnt = width + fmtcnt + 100;
4171 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004172 if (reslen < 0) {
4173 Py_DECREF(result);
4174 return PyErr_NoMemory();
4175 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004176 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004177 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004178 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004179 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004180 }
4181 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004182 if (fill != ' ')
4183 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004184 rescnt--;
4185 if (width > len)
4186 width--;
4187 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004188 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4189 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004190 assert(pbuf[1] == c);
4191 if (fill != ' ') {
4192 *res++ = *pbuf++;
4193 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004194 }
Tim Petersfff53252001-04-12 18:38:48 +00004195 rescnt -= 2;
4196 width -= 2;
4197 if (width < 0)
4198 width = 0;
4199 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004200 }
4201 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004202 do {
4203 --rescnt;
4204 *res++ = fill;
4205 } while (--width > len);
4206 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004207 if (fill == ' ') {
4208 if (sign)
4209 *res++ = sign;
4210 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004211 (c == 'x' || c == 'X')) {
4212 assert(pbuf[0] == '0');
4213 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004214 *res++ = *pbuf++;
4215 *res++ = *pbuf++;
4216 }
4217 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004218 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004219 res += len;
4220 rescnt -= len;
4221 while (--width >= len) {
4222 --rescnt;
4223 *res++ = ' ';
4224 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004225 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004226 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004227 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004228 goto error;
4229 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004230 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004231 } /* '%' */
4232 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004233 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004234 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004235 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004236 goto error;
4237 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004238 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004239 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004240 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004241 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004242 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004243
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004244#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004245 unicode:
4246 if (args_owned) {
4247 Py_DECREF(args);
4248 args_owned = 0;
4249 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004250 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004251 if (PyTuple_Check(orig_args) && argidx > 0) {
4252 PyObject *v;
4253 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4254 v = PyTuple_New(n);
4255 if (v == NULL)
4256 goto error;
4257 while (--n >= 0) {
4258 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4259 Py_INCREF(w);
4260 PyTuple_SET_ITEM(v, n, w);
4261 }
4262 args = v;
4263 } else {
4264 Py_INCREF(orig_args);
4265 args = orig_args;
4266 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004267 args_owned = 1;
4268 /* Take what we have of the result and let the Unicode formatting
4269 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004270 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004271 if (_PyString_Resize(&result, rescnt))
4272 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004273 fmtcnt = PyString_GET_SIZE(format) - \
4274 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004275 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4276 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004277 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004278 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004279 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004280 if (v == NULL)
4281 goto error;
4282 /* Paste what we have (result) to what the Unicode formatting
4283 function returned (v) and return the result (or error) */
4284 w = PyUnicode_Concat(result, v);
4285 Py_DECREF(result);
4286 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004287 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004288 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004289#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004290
Guido van Rossume5372401993-03-16 12:15:04 +00004291 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004292 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004293 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004294 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004295 }
Guido van Rossume5372401993-03-16 12:15:04 +00004296 return NULL;
4297}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004298
Guido van Rossum2a61e741997-01-18 07:55:05 +00004299void
Fred Drakeba096332000-07-09 07:04:36 +00004300PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004301{
4302 register PyStringObject *s = (PyStringObject *)(*p);
4303 PyObject *t;
4304 if (s == NULL || !PyString_Check(s))
4305 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004306 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004307 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004308 if (interned == NULL) {
4309 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004310 if (interned == NULL) {
4311 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004312 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004313 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004314 }
4315 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4316 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004317 Py_DECREF(*p);
4318 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004319 return;
4320 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004321 /* Ensure that only true string objects appear in the intern dict */
4322 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004323 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4324 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004325 if (t == NULL) {
4326 PyErr_Clear();
4327 return;
Tim Peters111f6092001-09-12 07:54:51 +00004328 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004329 } else {
4330 t = (PyObject*) s;
4331 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004332 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004333
4334 if (PyDict_SetItem(interned, t, t) == 0) {
4335 /* The two references in interned are not counted by
4336 refcnt. The string deallocator will take care of this */
4337 ((PyObject *)t)->ob_refcnt-=2;
4338 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4339 Py_DECREF(*p);
4340 *p = t;
4341 return;
4342 }
4343 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004344 PyErr_Clear();
4345}
4346
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004347void
4348PyString_InternImmortal(PyObject **p)
4349{
4350 PyString_InternInPlace(p);
4351 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4352 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4353 Py_INCREF(*p);
4354 }
4355}
4356
Guido van Rossum2a61e741997-01-18 07:55:05 +00004357
4358PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004359PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004360{
4361 PyObject *s = PyString_FromString(cp);
4362 if (s == NULL)
4363 return NULL;
4364 PyString_InternInPlace(&s);
4365 return s;
4366}
4367
Guido van Rossum8cf04761997-08-02 02:57:45 +00004368void
Fred Drakeba096332000-07-09 07:04:36 +00004369PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004370{
4371 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004372 for (i = 0; i < UCHAR_MAX + 1; i++) {
4373 Py_XDECREF(characters[i]);
4374 characters[i] = NULL;
4375 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004376 Py_XDECREF(nullstring);
4377 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004378}
Barry Warsawa903ad982001-02-23 16:40:48 +00004379
Barry Warsawa903ad982001-02-23 16:40:48 +00004380void _Py_ReleaseInternedStrings(void)
4381{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004382 PyObject *keys;
4383 PyStringObject *s;
4384 int i, n;
4385
4386 if (interned == NULL || !PyDict_Check(interned))
4387 return;
4388 keys = PyDict_Keys(interned);
4389 if (keys == NULL || !PyList_Check(keys)) {
4390 PyErr_Clear();
4391 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004392 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004393
4394 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4395 detector, interned strings are not forcibly deallocated; rather, we
4396 give them their stolen references back, and then clear and DECREF
4397 the interned dict. */
4398
4399 fprintf(stderr, "releasing interned strings\n");
4400 n = PyList_GET_SIZE(keys);
4401 for (i = 0; i < n; i++) {
4402 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4403 switch (s->ob_sstate) {
4404 case SSTATE_NOT_INTERNED:
4405 /* XXX Shouldn't happen */
4406 break;
4407 case SSTATE_INTERNED_IMMORTAL:
4408 s->ob_refcnt += 1;
4409 break;
4410 case SSTATE_INTERNED_MORTAL:
4411 s->ob_refcnt += 2;
4412 break;
4413 default:
4414 Py_FatalError("Inconsistent interned string state.");
4415 }
4416 s->ob_sstate = SSTATE_NOT_INTERNED;
4417 }
4418 Py_DECREF(keys);
4419 PyDict_Clear(interned);
4420 Py_DECREF(interned);
4421 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004422}