blob: c87b68800941adcdb46967a657a1204f2492d175 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000073 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000074 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000076 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000078 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000082 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000100PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101{
Tim Peters62de65b2001-12-06 20:29:32 +0000102 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000103 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000104
105 assert(str != NULL);
106 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000107 if (size > INT_MAX) {
108 PyErr_SetString(PyExc_OverflowError,
109 "string is too long for a Python string");
110 return NULL;
111 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0 && (op = nullstring) != NULL) {
113#ifdef COUNT_ALLOCS
114 null_strings++;
115#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
120#ifdef COUNT_ALLOCS
121 one_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000127 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000128 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000133 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000134 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000135 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000137 PyObject *t = (PyObject *)op;
138 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000139 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Barry Warsawdadace02001-08-24 18:32:06 +0000152PyObject *
153PyString_FromFormatV(const char *format, va_list vargs)
154{
Tim Petersc15c4f12001-10-02 21:32:07 +0000155 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 int n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
160
Tim Petersc15c4f12001-10-02 21:32:07 +0000161#ifdef VA_LIST_IS_ARRAY
162 memcpy(count, vargs, sizeof(va_list));
163#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000167 count = vargs;
168#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000169#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
175 ;
176
177 /* skip the 'l' in %ld, since it doesn't change the
178 width. although only %d is supported (see
179 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000180 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000181 if (*f == 'l' && *(f+1) == 'd')
182 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000183
Barry Warsawdadace02001-08-24 18:32:06 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'i': case 'x':
192 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000217 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000228 string = PyString_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231
Barry Warsawdadace02001-08-24 18:32:06 +0000232 s = PyString_AsString(string);
233
234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 int i, longflag = 0;
238 /* parse the width.precision part (we're only
239 interested in the precision value, if any) */
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 if (*f == '.') {
244 f++;
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 }
249 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
250 f++;
251 /* handle the long flag, but only for %ld. others
252 can be added when necessary. */
253 if (*f == 'l' && *(f+1) == 'd') {
254 longflag = 1;
255 ++f;
256 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000257
Barry Warsawdadace02001-08-24 18:32:06 +0000258 switch (*f) {
259 case 'c':
260 *s++ = va_arg(vargs, int);
261 break;
262 case 'd':
263 if (longflag)
264 sprintf(s, "%ld", va_arg(vargs, long));
265 else
266 sprintf(s, "%d", va_arg(vargs, int));
267 s += strlen(s);
268 break;
269 case 'i':
270 sprintf(s, "%i", va_arg(vargs, int));
271 s += strlen(s);
272 break;
273 case 'x':
274 sprintf(s, "%x", va_arg(vargs, int));
275 s += strlen(s);
276 break;
277 case 's':
278 p = va_arg(vargs, char*);
279 i = strlen(p);
280 if (n > 0 && i > n)
281 i = n;
282 memcpy(s, p, i);
283 s += i;
284 break;
285 case 'p':
286 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000287 /* %p is ill-defined: ensure leading 0x. */
288 if (s[1] == 'X')
289 s[1] = 'x';
290 else if (s[1] != 'x') {
291 memmove(s+2, s, strlen(s)+1);
292 s[0] = '0';
293 s[1] = 'x';
294 }
Barry Warsawdadace02001-08-24 18:32:06 +0000295 s += strlen(s);
296 break;
297 case '%':
298 *s++ = '%';
299 break;
300 default:
301 strcpy(s, p);
302 s += strlen(s);
303 goto end;
304 }
305 } else
306 *s++ = *f;
307 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000311 return string;
312}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000313
Barry Warsawdadace02001-08-24 18:32:06 +0000314PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000316{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000317 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318 va_list vargs;
319
320#ifdef HAVE_STDARG_PROTOTYPES
321 va_start(vargs, format);
322#else
323 va_start(vargs);
324#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000325 ret = PyString_FromFormatV(format, vargs);
326 va_end(vargs);
327 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000328}
329
330
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000331PyObject *PyString_Decode(const char *s,
332 int size,
333 const char *encoding,
334 const char *errors)
335{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000336 PyObject *v, *str;
337
338 str = PyString_FromStringAndSize(s, size);
339 if (str == NULL)
340 return NULL;
341 v = PyString_AsDecodedString(str, encoding, errors);
342 Py_DECREF(str);
343 return v;
344}
345
346PyObject *PyString_AsDecodedObject(PyObject *str,
347 const char *encoding,
348 const char *errors)
349{
350 PyObject *v;
351
352 if (!PyString_Check(str)) {
353 PyErr_BadArgument();
354 goto onError;
355 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000356
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000357 if (encoding == NULL) {
358#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000360#else
361 PyErr_SetString(PyExc_ValueError, "no encoding specified");
362 goto onError;
363#endif
364 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365
366 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000367 v = PyCodec_Decode(str, encoding, errors);
368 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000370
371 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000372
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000374 return NULL;
375}
376
377PyObject *PyString_AsDecodedString(PyObject *str,
378 const char *encoding,
379 const char *errors)
380{
381 PyObject *v;
382
383 v = PyString_AsDecodedObject(str, encoding, errors);
384 if (v == NULL)
385 goto onError;
386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 /* Convert Unicode to a string using the default encoding */
389 if (PyUnicode_Check(v)) {
390 PyObject *temp = v;
391 v = PyUnicode_AsEncodedString(v, NULL, NULL);
392 Py_DECREF(temp);
393 if (v == NULL)
394 goto onError;
395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 if (!PyString_Check(v)) {
398 PyErr_Format(PyExc_TypeError,
399 "decoder did not return a string object (type=%.400s)",
400 v->ob_type->tp_name);
401 Py_DECREF(v);
402 goto onError;
403 }
404
405 return v;
406
407 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 return NULL;
409}
410
411PyObject *PyString_Encode(const char *s,
412 int size,
413 const char *encoding,
414 const char *errors)
415{
416 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000417
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 str = PyString_FromStringAndSize(s, size);
419 if (str == NULL)
420 return NULL;
421 v = PyString_AsEncodedString(str, encoding, errors);
422 Py_DECREF(str);
423 return v;
424}
425
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000426PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 if (!PyString_Check(str)) {
433 PyErr_BadArgument();
434 goto onError;
435 }
436
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000437 if (encoding == NULL) {
438#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000440#else
441 PyErr_SetString(PyExc_ValueError, "no encoding specified");
442 goto onError;
443#endif
444 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445
446 /* Encode via the codec registry */
447 v = PyCodec_Encode(str, encoding, errors);
448 if (v == NULL)
449 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450
451 return v;
452
453 onError:
454 return NULL;
455}
456
457PyObject *PyString_AsEncodedString(PyObject *str,
458 const char *encoding,
459 const char *errors)
460{
461 PyObject *v;
462
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000463 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464 if (v == NULL)
465 goto onError;
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 /* Convert Unicode to a string using the default encoding */
469 if (PyUnicode_Check(v)) {
470 PyObject *temp = v;
471 v = PyUnicode_AsEncodedString(v, NULL, NULL);
472 Py_DECREF(temp);
473 if (v == NULL)
474 goto onError;
475 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 if (!PyString_Check(v)) {
478 PyErr_Format(PyExc_TypeError,
479 "encoder did not return a string object (type=%.400s)",
480 v->ob_type->tp_name);
481 Py_DECREF(v);
482 goto onError;
483 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000484
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000485 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 onError:
488 return NULL;
489}
490
Guido van Rossum234f9421993-06-17 12:35:49 +0000491static void
Fred Drakeba096332000-07-09 07:04:36 +0000492string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000493{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000494 switch (PyString_CHECK_INTERNED(op)) {
495 case SSTATE_NOT_INTERNED:
496 break;
497
498 case SSTATE_INTERNED_MORTAL:
499 /* revive dead object temporarily for DelItem */
500 op->ob_refcnt = 3;
501 if (PyDict_DelItem(interned, op) != 0)
502 Py_FatalError(
503 "deletion of interned string failed");
504 break;
505
506 case SSTATE_INTERNED_IMMORTAL:
507 Py_FatalError("Immortal interned string died.");
508
509 default:
510 Py_FatalError("Inconsistent interned string state.");
511 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000512 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000513}
514
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000515/* Unescape a backslash-escaped string. If unicode is non-zero,
516 the string is a u-literal. If recode_encoding is non-zero,
517 the string is UTF-8 encoded and should be re-encoded in the
518 specified encoding. */
519
520PyObject *PyString_DecodeEscape(const char *s,
521 int len,
522 const char *errors,
523 int unicode,
524 const char *recode_encoding)
525{
526 int c;
527 char *p, *buf;
528 const char *end;
529 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000530 int newlen = recode_encoding ? 4*len:len;
531 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 if (v == NULL)
533 return NULL;
534 p = buf = PyString_AsString(v);
535 end = s + len;
536 while (s < end) {
537 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000538 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539#ifdef Py_USING_UNICODE
540 if (recode_encoding && (*s & 0x80)) {
541 PyObject *u, *w;
542 char *r;
543 const char* t;
544 int rn;
545 t = s;
546 /* Decode non-ASCII bytes as UTF-8. */
547 while (t < end && (*t & 0x80)) t++;
548 u = PyUnicode_DecodeUTF8(s, t - s, errors);
549 if(!u) goto failed;
550
551 /* Recode them in target encoding. */
552 w = PyUnicode_AsEncodedString(
553 u, recode_encoding, errors);
554 Py_DECREF(u);
555 if (!w) goto failed;
556
557 /* Append bytes to output buffer. */
558 r = PyString_AsString(w);
559 rn = PyString_Size(w);
560 memcpy(p, r, rn);
561 p += rn;
562 Py_DECREF(w);
563 s = t;
564 } else {
565 *p++ = *s++;
566 }
567#else
568 *p++ = *s++;
569#endif
570 continue;
571 }
572 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000573 if (s==end) {
574 PyErr_SetString(PyExc_ValueError,
575 "Trailing \\ in string");
576 goto failed;
577 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 switch (*s++) {
579 /* XXX This assumes ASCII! */
580 case '\n': break;
581 case '\\': *p++ = '\\'; break;
582 case '\'': *p++ = '\''; break;
583 case '\"': *p++ = '\"'; break;
584 case 'b': *p++ = '\b'; break;
585 case 'f': *p++ = '\014'; break; /* FF */
586 case 't': *p++ = '\t'; break;
587 case 'n': *p++ = '\n'; break;
588 case 'r': *p++ = '\r'; break;
589 case 'v': *p++ = '\013'; break; /* VT */
590 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
591 case '0': case '1': case '2': case '3':
592 case '4': case '5': case '6': case '7':
593 c = s[-1] - '0';
594 if ('0' <= *s && *s <= '7') {
595 c = (c<<3) + *s++ - '0';
596 if ('0' <= *s && *s <= '7')
597 c = (c<<3) + *s++ - '0';
598 }
599 *p++ = c;
600 break;
601 case 'x':
602 if (isxdigit(Py_CHARMASK(s[0]))
603 && isxdigit(Py_CHARMASK(s[1]))) {
604 unsigned int x = 0;
605 c = Py_CHARMASK(*s);
606 s++;
607 if (isdigit(c))
608 x = c - '0';
609 else if (islower(c))
610 x = 10 + c - 'a';
611 else
612 x = 10 + c - 'A';
613 x = x << 4;
614 c = Py_CHARMASK(*s);
615 s++;
616 if (isdigit(c))
617 x += c - '0';
618 else if (islower(c))
619 x += 10 + c - 'a';
620 else
621 x += 10 + c - 'A';
622 *p++ = x;
623 break;
624 }
625 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 PyErr_SetString(PyExc_ValueError,
627 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000628 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 }
630 if (strcmp(errors, "replace") == 0) {
631 *p++ = '?';
632 } else if (strcmp(errors, "ignore") == 0)
633 /* do nothing */;
634 else {
635 PyErr_Format(PyExc_ValueError,
636 "decoding error; "
637 "unknown error handling code: %.400s",
638 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000639 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 }
641#ifndef Py_USING_UNICODE
642 case 'u':
643 case 'U':
644 case 'N':
645 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000646 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 "Unicode escapes not legal "
648 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000649 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000650 }
651#endif
652 default:
653 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000654 s--;
655 goto non_esc; /* an arbitry number of unescaped
656 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 }
658 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000659 if (p-buf < newlen)
660 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 return v;
662 failed:
663 Py_DECREF(v);
664 return NULL;
665}
666
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667static int
668string_getsize(register PyObject *op)
669{
670 char *s;
671 int len;
672 if (PyString_AsStringAndSize(op, &s, &len))
673 return -1;
674 return len;
675}
676
677static /*const*/ char *
678string_getbuffer(register PyObject *op)
679{
680 char *s;
681 int len;
682 if (PyString_AsStringAndSize(op, &s, &len))
683 return NULL;
684 return s;
685}
686
Guido van Rossumd7047b31995-01-02 19:07:15 +0000687int
Fred Drakeba096332000-07-09 07:04:36 +0000688PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000690 if (!PyString_Check(op))
691 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693}
694
695/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000696PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (!PyString_Check(op))
699 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000703int
704PyString_AsStringAndSize(register PyObject *obj,
705 register char **s,
706 register int *len)
707{
708 if (s == NULL) {
709 PyErr_BadInternalCall();
710 return -1;
711 }
712
713 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000714#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (PyUnicode_Check(obj)) {
716 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
717 if (obj == NULL)
718 return -1;
719 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000720 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000721#endif
722 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 PyErr_Format(PyExc_TypeError,
724 "expected string or Unicode object, "
725 "%.200s found", obj->ob_type->tp_name);
726 return -1;
727 }
728 }
729
730 *s = PyString_AS_STRING(obj);
731 if (len != NULL)
732 *len = PyString_GET_SIZE(obj);
733 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
734 PyErr_SetString(PyExc_TypeError,
735 "expected string without null bytes");
736 return -1;
737 }
738 return 0;
739}
740
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741/* Methods */
742
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000743static int
Fred Drakeba096332000-07-09 07:04:36 +0000744string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745{
746 int i;
747 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000748 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000749
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000750 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000751 if (! PyString_CheckExact(op)) {
752 int ret;
753 /* A str subclass may have its own __str__ method. */
754 op = (PyStringObject *) PyObject_Str((PyObject *)op);
755 if (op == NULL)
756 return -1;
757 ret = string_print(op, fp, flags);
758 Py_DECREF(op);
759 return ret;
760 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000762#ifdef __VMS
763 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
764#else
765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
766#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000769
Thomas Wouters7e474022000-07-16 12:04:32 +0000770 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000772 if (memchr(op->ob_sval, '\'', op->ob_size) &&
773 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 quote = '"';
775
776 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 for (i = 0; i < op->ob_size; i++) {
778 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000779 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000780 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000781 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\r");
787 else if (c < ' ' || c >= 0x7f)
788 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000789 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794}
795
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000796PyObject *
797PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000799 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000800 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000801 PyObject *v;
802 if (newsize > INT_MAX) {
803 PyErr_SetString(PyExc_OverflowError,
804 "string is too large to make repr");
805 }
806 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 }
810 else {
811 register int i;
812 register char c;
813 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 int quote;
815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000818 if (smartquotes &&
819 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000820 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 quote = '"';
822
Tim Peters9161c8b2001-12-03 01:55:38 +0000823 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000826 /* There's at least enough room for a hex escape
827 and a closing quote. */
828 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000832 else if (c == '\t')
833 *p++ = '\\', *p++ = 't';
834 else if (c == '\n')
835 *p++ = '\\', *p++ = 'n';
836 else if (c == '\r')
837 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000838 else if (c < ' ' || c >= 0x7f) {
839 /* For performance, we don't want to call
840 PyOS_snprintf here (extra layers of
841 function call). */
842 sprintf(p, "\\x%02x", c & 0xff);
843 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000844 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 else
846 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Guido van Rossum189f1df2001-05-01 16:51:53 +0000857static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858string_repr(PyObject *op)
859{
860 return PyString_Repr(op, 1);
861}
862
863static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000864string_str(PyObject *s)
865{
Tim Petersc9933152001-10-16 20:18:24 +0000866 assert(PyString_Check(s));
867 if (PyString_CheckExact(s)) {
868 Py_INCREF(s);
869 return s;
870 }
871 else {
872 /* Subtype -- return genuine string with the same value. */
873 PyStringObject *t = (PyStringObject *) s;
874 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
875 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000876}
877
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878static int
Fred Drakeba096332000-07-09 07:04:36 +0000879string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880{
881 return a->ob_size;
882}
883
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000884static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000885string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 register PyStringObject *op;
889 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 if (PyUnicode_Check(bb))
892 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000894 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000895 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000896 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 return NULL;
898 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000901 if ((a->ob_size == 0 || b->ob_size == 0) &&
902 PyString_CheckExact(a) && PyString_CheckExact(b)) {
903 if (a->ob_size == 0) {
904 Py_INCREF(bb);
905 return bb;
906 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 Py_INCREF(a);
908 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
910 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000911 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000912 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000929 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000930 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000932 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 if (n < 0)
934 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000935 /* watch out for overflows: the size can overflow int,
936 * and the # of bytes needed can overflow size_t
937 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000939 if (n && size / n != a->ob_size) {
940 PyErr_SetString(PyExc_OverflowError,
941 "repeated string is too long");
942 return NULL;
943 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 Py_INCREF(a);
946 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 }
Tim Peterse7c05322004-06-27 17:24:49 +0000948 nbytes = (size_t)size;
949 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000961 op->ob_sval[size] = '\0';
962 if (a->ob_size == 1 && n > 0) {
963 memset(op->ob_sval, a->ob_sval[0] , n);
964 return (PyObject *) op;
965 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000966 i = 0;
967 if (i < size) {
968 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
969 i = (int) a->ob_size;
970 }
971 while (i < size) {
972 j = (i <= size-i) ? i : size-i;
973 memcpy(op->ob_sval+i, op->ob_sval, j);
974 i += j;
975 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977}
978
979/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_slice(register PyStringObject *a, register int i, register int j)
983 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984{
985 if (i < 0)
986 i = 0;
987 if (j < 0)
988 j = 0; /* Avoid signed/unsigned bug in next line */
989 if (j > a->ob_size)
990 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000991 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
992 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 Py_INCREF(a);
994 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995 }
996 if (j < i)
997 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999}
1000
Guido van Rossum9284a572000-03-07 15:53:43 +00001001static int
Fred Drakeba096332000-07-09 07:04:36 +00001002string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001003{
Barry Warsaw817918c2002-08-06 16:58:21 +00001004 const char *lhs, *rhs, *end;
1005 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006
1007 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001008#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001009 if (PyUnicode_Check(el))
1010 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 if (!PyString_Check(el)) {
1013 PyErr_SetString(PyExc_TypeError,
1014 "'in <string>' requires string as left operand");
1015 return -1;
1016 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001018 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001019 rhs = PyString_AS_STRING(el);
1020 lhs = PyString_AS_STRING(a);
1021
1022 /* optimize for a single character */
1023 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001024 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001025
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 while (lhs <= end) {
1028 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001029 return 1;
1030 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001031
Guido van Rossum9284a572000-03-07 15:53:43 +00001032 return 0;
1033}
1034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001036string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001039 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 return NULL;
1043 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001044 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001045 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001046 if (v == NULL)
1047 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001048 else {
1049#ifdef COUNT_ALLOCS
1050 one_strings++;
1051#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001052 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001053 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001054 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055}
1056
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057static PyObject*
1058string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001060 int c;
1061 int len_a, len_b;
1062 int min_len;
1063 PyObject *result;
1064
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001065 /* Make sure both arguments are strings. */
1066 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001067 result = Py_NotImplemented;
1068 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001069 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001070 if (a == b) {
1071 switch (op) {
1072 case Py_EQ:case Py_LE:case Py_GE:
1073 result = Py_True;
1074 goto out;
1075 case Py_NE:case Py_LT:case Py_GT:
1076 result = Py_False;
1077 goto out;
1078 }
1079 }
1080 if (op == Py_EQ) {
1081 /* Supporting Py_NE here as well does not save
1082 much time, since Py_NE is rarely used. */
1083 if (a->ob_size == b->ob_size
1084 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001085 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086 a->ob_size) == 0)) {
1087 result = Py_True;
1088 } else {
1089 result = Py_False;
1090 }
1091 goto out;
1092 }
1093 len_a = a->ob_size; len_b = b->ob_size;
1094 min_len = (len_a < len_b) ? len_a : len_b;
1095 if (min_len > 0) {
1096 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1097 if (c==0)
1098 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1099 }else
1100 c = 0;
1101 if (c == 0)
1102 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1103 switch (op) {
1104 case Py_LT: c = c < 0; break;
1105 case Py_LE: c = c <= 0; break;
1106 case Py_EQ: assert(0); break; /* unreachable */
1107 case Py_NE: c = c != 0; break;
1108 case Py_GT: c = c > 0; break;
1109 case Py_GE: c = c >= 0; break;
1110 default:
1111 result = Py_NotImplemented;
1112 goto out;
1113 }
1114 result = c ? Py_True : Py_False;
1115 out:
1116 Py_INCREF(result);
1117 return result;
1118}
1119
1120int
1121_PyString_Eq(PyObject *o1, PyObject *o2)
1122{
1123 PyStringObject *a, *b;
1124 a = (PyStringObject*)o1;
1125 b = (PyStringObject*)o2;
1126 return a->ob_size == b->ob_size
1127 && *a->ob_sval == *b->ob_sval
1128 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001129}
1130
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131static long
Fred Drakeba096332000-07-09 07:04:36 +00001132string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001134 register int len;
1135 register unsigned char *p;
1136 register long x;
1137
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 if (a->ob_shash != -1)
1139 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001140 len = a->ob_size;
1141 p = (unsigned char *) a->ob_sval;
1142 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001143 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001144 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145 x ^= a->ob_size;
1146 if (x == -1)
1147 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 return x;
1150}
1151
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001152static PyObject*
1153string_subscript(PyStringObject* self, PyObject* item)
1154{
1155 if (PyInt_Check(item)) {
1156 long i = PyInt_AS_LONG(item);
1157 if (i < 0)
1158 i += PyString_GET_SIZE(self);
1159 return string_item(self,i);
1160 }
1161 else if (PyLong_Check(item)) {
1162 long i = PyLong_AsLong(item);
1163 if (i == -1 && PyErr_Occurred())
1164 return NULL;
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PySlice_Check(item)) {
1170 int start, stop, step, slicelength, cur, i;
1171 char* source_buf;
1172 char* result_buf;
1173 PyObject* result;
1174
1175 if (PySlice_GetIndicesEx((PySliceObject*)item,
1176 PyString_GET_SIZE(self),
1177 &start, &stop, &step, &slicelength) < 0) {
1178 return NULL;
1179 }
1180
1181 if (slicelength <= 0) {
1182 return PyString_FromStringAndSize("", 0);
1183 }
1184 else {
1185 source_buf = PyString_AsString((PyObject*)self);
1186 result_buf = PyMem_Malloc(slicelength);
1187
1188 for (cur = start, i = 0; i < slicelength;
1189 cur += step, i++) {
1190 result_buf[i] = source_buf[cur];
1191 }
1192
1193 result = PyString_FromStringAndSize(result_buf,
1194 slicelength);
1195 PyMem_Free(result_buf);
1196 return result;
1197 }
1198 }
1199 else {
1200 PyErr_SetString(PyExc_TypeError,
1201 "string indices must be integers");
1202 return NULL;
1203 }
1204}
1205
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
1209 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001210 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001211 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212 return -1;
1213 }
1214 *ptr = (void *)self->ob_sval;
1215 return self->ob_size;
1216}
1217
1218static int
Fred Drakeba096332000-07-09 07:04:36 +00001219string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220{
Guido van Rossum045e6881997-09-08 18:30:11 +00001221 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001222 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223 return -1;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
1229 if ( lenp )
1230 *lenp = self->ob_size;
1231 return 1;
1232}
1233
Guido van Rossum1db70701998-10-08 02:18:52 +00001234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001236{
1237 if ( index != 0 ) {
1238 PyErr_SetString(PyExc_SystemError,
1239 "accessing non-existent string segment");
1240 return -1;
1241 }
1242 *ptr = self->ob_sval;
1243 return self->ob_size;
1244}
1245
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001246static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001247 (inquiry)string_length, /*sq_length*/
1248 (binaryfunc)string_concat, /*sq_concat*/
1249 (intargfunc)string_repeat, /*sq_repeat*/
1250 (intargfunc)string_item, /*sq_item*/
1251 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001252 0, /*sq_ass_item*/
1253 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001254 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001255};
1256
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001257static PyMappingMethods string_as_mapping = {
1258 (inquiry)string_length,
1259 (binaryfunc)string_subscript,
1260 0,
1261};
1262
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263static PyBufferProcs string_as_buffer = {
1264 (getreadbufferproc)string_buffer_getreadbuf,
1265 (getwritebufferproc)string_buffer_getwritebuf,
1266 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001267 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268};
1269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
1271
1272#define LEFTSTRIP 0
1273#define RIGHTSTRIP 1
1274#define BOTHSTRIP 2
1275
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001276/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001277static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1278
1279#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001281#define SPLIT_APPEND(data, left, right) \
1282 str = PyString_FromStringAndSize((data) + (left), \
1283 (right) - (left)); \
1284 if (str == NULL) \
1285 goto onError; \
1286 if (PyList_Append(list, str)) { \
1287 Py_DECREF(str); \
1288 goto onError; \
1289 } \
1290 else \
1291 Py_DECREF(str);
1292
1293#define SPLIT_INSERT(data, left, right) \
1294 str = PyString_FromStringAndSize((data) + (left), \
1295 (right) - (left)); \
1296 if (str == NULL) \
1297 goto onError; \
1298 if (PyList_Insert(list, 0, str)) { \
1299 Py_DECREF(str); \
1300 goto onError; \
1301 } \
1302 else \
1303 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001306split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001308 int i, j;
1309 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 PyObject *list = PyList_New(0);
1311
1312 if (list == NULL)
1313 return NULL;
1314
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 for (i = j = 0; i < len; ) {
1316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 while (i < len && !isspace(Py_CHARMASK(s[i])))
1320 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 if (maxsplit-- <= 0)
1323 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001325 while (i < len && isspace(Py_CHARMASK(s[i])))
1326 i++;
1327 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 }
1329 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001331 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 Py_DECREF(list);
1336 return NULL;
1337}
1338
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339static PyObject *
1340split_char(const char *s, int len, char ch, int maxcount)
1341{
1342 register int i, j;
1343 PyObject *str;
1344 PyObject *list = PyList_New(0);
1345
1346 if (list == NULL)
1347 return NULL;
1348
1349 for (i = j = 0; i < len; ) {
1350 if (s[i] == ch) {
1351 if (maxcount-- <= 0)
1352 break;
1353 SPLIT_APPEND(s, j, i);
1354 i = j = i + 1;
1355 } else
1356 i++;
1357 }
1358 if (j <= len) {
1359 SPLIT_APPEND(s, j, len);
1360 }
1361 return list;
1362
1363 onError:
1364 Py_DECREF(list);
1365 return NULL;
1366}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001368PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369"S.split([sep [,maxsplit]]) -> list of strings\n\
1370\n\
1371Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001373splits are done. If sep is not specified or is None, any\n\
1374whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375
1376static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001377string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378{
1379 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 int maxsplit = -1;
1381 const char *s = PyString_AS_STRING(self), *sub;
1382 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Guido van Rossum4c08d552000-03-10 22:55:18 +00001384 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 if (maxsplit < 0)
1387 maxsplit = INT_MAX;
1388 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (PyString_Check(subobj)) {
1391 sub = PyString_AS_STRING(subobj);
1392 n = PyString_GET_SIZE(subobj);
1393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001394#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 else if (PyUnicode_Check(subobj))
1396 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1399 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 if (n == 0) {
1402 PyErr_SetString(PyExc_ValueError, "empty separator");
1403 return NULL;
1404 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001405 else if (n == 1)
1406 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407
1408 list = PyList_New(0);
1409 if (list == NULL)
1410 return NULL;
1411
1412 i = j = 0;
1413 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001414 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 if (maxsplit-- <= 0)
1416 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1418 if (item == NULL)
1419 goto fail;
1420 err = PyList_Append(list, item);
1421 Py_DECREF(item);
1422 if (err < 0)
1423 goto fail;
1424 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
1426 else
1427 i++;
1428 }
1429 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1430 if (item == NULL)
1431 goto fail;
1432 err = PyList_Append(list, item);
1433 Py_DECREF(item);
1434 if (err < 0)
1435 goto fail;
1436
1437 return list;
1438
1439 fail:
1440 Py_DECREF(list);
1441 return NULL;
1442}
1443
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001444static PyObject *
1445rsplit_whitespace(const char *s, int len, int maxsplit)
1446{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 int i, j;
1448 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001449 PyObject *list = PyList_New(0);
1450
1451 if (list == NULL)
1452 return NULL;
1453
1454 for (i = j = len - 1; i >= 0; ) {
1455 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1456 i--;
1457 j = i;
1458 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1459 i--;
1460 if (j > i) {
1461 if (maxsplit-- <= 0)
1462 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001464 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1465 i--;
1466 j = i;
1467 }
1468 }
1469 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001471 }
1472 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474 Py_DECREF(list);
1475 return NULL;
1476}
1477
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478static PyObject *
1479rsplit_char(const char *s, int len, char ch, int maxcount)
1480{
1481 register int i, j;
1482 PyObject *str;
1483 PyObject *list = PyList_New(0);
1484
1485 if (list == NULL)
1486 return NULL;
1487
1488 for (i = j = len - 1; i >= 0; ) {
1489 if (s[i] == ch) {
1490 if (maxcount-- <= 0)
1491 break;
1492 SPLIT_INSERT(s, i + 1, j + 1);
1493 j = i = i - 1;
1494 } else
1495 i--;
1496 }
1497 if (j >= -1) {
1498 SPLIT_INSERT(s, 0, j + 1);
1499 }
1500 return list;
1501
1502 onError:
1503 Py_DECREF(list);
1504 return NULL;
1505}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001506
1507PyDoc_STRVAR(rsplit__doc__,
1508"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1509\n\
1510Return a list of the words in the string S, using sep as the\n\
1511delimiter string, starting at the end of the string and working\n\
1512to the front. If maxsplit is given, at most maxsplit splits are\n\
1513done. If sep is not specified or is None, any whitespace string\n\
1514is a separator.");
1515
1516static PyObject *
1517string_rsplit(PyStringObject *self, PyObject *args)
1518{
1519 int len = PyString_GET_SIZE(self), n, i, j, err;
1520 int maxsplit = -1;
1521 const char *s = PyString_AS_STRING(self), *sub;
1522 PyObject *list, *item, *subobj = Py_None;
1523
1524 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1525 return NULL;
1526 if (maxsplit < 0)
1527 maxsplit = INT_MAX;
1528 if (subobj == Py_None)
1529 return rsplit_whitespace(s, len, maxsplit);
1530 if (PyString_Check(subobj)) {
1531 sub = PyString_AS_STRING(subobj);
1532 n = PyString_GET_SIZE(subobj);
1533 }
1534#ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(subobj))
1536 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1537#endif
1538 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1539 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541 if (n == 0) {
1542 PyErr_SetString(PyExc_ValueError, "empty separator");
1543 return NULL;
1544 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001545 else if (n == 1)
1546 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001547
1548 list = PyList_New(0);
1549 if (list == NULL)
1550 return NULL;
1551
1552 j = len;
1553 i = j - n;
1554 while (i >= 0) {
1555 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1556 if (maxsplit-- <= 0)
1557 break;
1558 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1559 if (item == NULL)
1560 goto fail;
1561 err = PyList_Insert(list, 0, item);
1562 Py_DECREF(item);
1563 if (err < 0)
1564 goto fail;
1565 j = i;
1566 i -= n;
1567 }
1568 else
1569 i--;
1570 }
1571 item = PyString_FromStringAndSize(s, j);
1572 if (item == NULL)
1573 goto fail;
1574 err = PyList_Insert(list, 0, item);
1575 Py_DECREF(item);
1576 if (err < 0)
1577 goto fail;
1578
1579 return list;
1580
1581 fail:
1582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.join(sequence) -> string\n\
1589\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592
1593static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001594string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595{
1596 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001597 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 char *p;
1600 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001601 size_t sz = 0;
1602 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001603 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 seq = PySequence_Fast(orig, "");
1606 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001607 if (PyErr_ExceptionMatches(PyExc_TypeError))
1608 PyErr_Format(PyExc_TypeError,
1609 "sequence expected, %.80s found",
1610 orig->ob_type->tp_name);
1611 return NULL;
1612 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001613
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001614 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 if (seqlen == 0) {
1616 Py_DECREF(seq);
1617 return PyString_FromString("");
1618 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001620 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001621 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1622 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001623 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001624 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001625 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001627
Raymond Hettinger674f2412004-08-23 23:23:54 +00001628 /* There are at least two things to join, or else we have a subclass
1629 * of the builtin types in the sequence.
1630 * Do a pre-pass to figure out the total amount of space we'll
1631 * need (sz), see whether any argument is absurd, and defer to
1632 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001633 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001634 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001635 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001636 item = PySequence_Fast_GET_ITEM(seq, i);
1637 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001638#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001639 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001640 /* Defer to Unicode join.
1641 * CAUTION: There's no gurantee that the
1642 * original sequence can be iterated over
1643 * again, so we must pass seq here.
1644 */
1645 PyObject *result;
1646 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001647 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001648 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001649 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001650#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001651 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001652 "sequence item %i: expected string,"
1653 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001654 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001655 Py_DECREF(seq);
1656 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001657 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001658 sz += PyString_GET_SIZE(item);
1659 if (i != 0)
1660 sz += seplen;
1661 if (sz < old_sz || sz > INT_MAX) {
1662 PyErr_SetString(PyExc_OverflowError,
1663 "join() is too long for a Python string");
1664 Py_DECREF(seq);
1665 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001667 }
1668
1669 /* Allocate result space. */
1670 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1671 if (res == NULL) {
1672 Py_DECREF(seq);
1673 return NULL;
1674 }
1675
1676 /* Catenate everything. */
1677 p = PyString_AS_STRING(res);
1678 for (i = 0; i < seqlen; ++i) {
1679 size_t n;
1680 item = PySequence_Fast_GET_ITEM(seq, i);
1681 n = PyString_GET_SIZE(item);
1682 memcpy(p, PyString_AS_STRING(item), n);
1683 p += n;
1684 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001685 memcpy(p, sep, seplen);
1686 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001687 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001689
Jeremy Hylton49048292000-07-11 03:28:17 +00001690 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692}
1693
Tim Peters52e155e2001-06-16 05:42:57 +00001694PyObject *
1695_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001696{
Tim Petersa7259592001-06-16 05:11:17 +00001697 assert(sep != NULL && PyString_Check(sep));
1698 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001699 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001700}
1701
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001702static void
1703string_adjust_indices(int *start, int *end, int len)
1704{
1705 if (*end > len)
1706 *end = len;
1707 else if (*end < 0)
1708 *end += len;
1709 if (*end < 0)
1710 *end = 0;
1711 if (*start < 0)
1712 *start += len;
1713 if (*start < 0)
1714 *start = 0;
1715}
1716
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717static long
Fred Drakeba096332000-07-09 07:04:36 +00001718string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001720 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721 int len = PyString_GET_SIZE(self);
1722 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001725 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001726 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001727 return -2;
1728 if (PyString_Check(subobj)) {
1729 sub = PyString_AS_STRING(subobj);
1730 n = PyString_GET_SIZE(subobj);
1731 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001732#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001734 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001735#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 return -2;
1738
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001739 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 if (dir > 0) {
1742 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001744 last -= n;
1745 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001746 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 return (long)i;
1748 }
1749 else {
1750 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001751
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 if (n == 0 && i <= last)
1753 return (long)last;
1754 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001755 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 return (long)j;
1757 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001758
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 return -1;
1760}
1761
1762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001763PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764"S.find(sub [,start [,end]]) -> int\n\
1765\n\
1766Return the lowest index in S where substring sub is found,\n\
1767such that sub is contained within s[start,end]. Optional\n\
1768arguments start and end are interpreted as in slice notation.\n\
1769\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001770Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771
1772static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001773string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 if (result == -2)
1777 return NULL;
1778 return PyInt_FromLong(result);
1779}
1780
1781
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001782PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783"S.index(sub [,start [,end]]) -> int\n\
1784\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001785Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786
1787static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001788string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791 if (result == -2)
1792 return NULL;
1793 if (result == -1) {
1794 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001795 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796 return NULL;
1797 }
1798 return PyInt_FromLong(result);
1799}
1800
1801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001802PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803"S.rfind(sub [,start [,end]]) -> int\n\
1804\n\
1805Return the highest index in S where substring sub is found,\n\
1806such that sub is contained within s[start,end]. Optional\n\
1807arguments start and end are interpreted as in slice notation.\n\
1808\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001809Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810
1811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001812string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001814 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 if (result == -2)
1816 return NULL;
1817 return PyInt_FromLong(result);
1818}
1819
1820
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822"S.rindex(sub [,start [,end]]) -> int\n\
1823\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825
1826static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001827string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001829 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 if (result == -2)
1831 return NULL;
1832 if (result == -1) {
1833 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001834 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 return NULL;
1836 }
1837 return PyInt_FromLong(result);
1838}
1839
1840
1841static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001842do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1843{
1844 char *s = PyString_AS_STRING(self);
1845 int len = PyString_GET_SIZE(self);
1846 char *sep = PyString_AS_STRING(sepobj);
1847 int seplen = PyString_GET_SIZE(sepobj);
1848 int i, j;
1849
1850 i = 0;
1851 if (striptype != RIGHTSTRIP) {
1852 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1853 i++;
1854 }
1855 }
1856
1857 j = len;
1858 if (striptype != LEFTSTRIP) {
1859 do {
1860 j--;
1861 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1862 j++;
1863 }
1864
1865 if (i == 0 && j == len && PyString_CheckExact(self)) {
1866 Py_INCREF(self);
1867 return (PyObject*)self;
1868 }
1869 else
1870 return PyString_FromStringAndSize(s+i, j-i);
1871}
1872
1873
1874static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001875do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876{
1877 char *s = PyString_AS_STRING(self);
1878 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880 i = 0;
1881 if (striptype != RIGHTSTRIP) {
1882 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1883 i++;
1884 }
1885 }
1886
1887 j = len;
1888 if (striptype != LEFTSTRIP) {
1889 do {
1890 j--;
1891 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1892 j++;
1893 }
1894
Tim Peters8fa5dd02001-09-12 02:18:30 +00001895 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896 Py_INCREF(self);
1897 return (PyObject*)self;
1898 }
1899 else
1900 return PyString_FromStringAndSize(s+i, j-i);
1901}
1902
1903
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001904static PyObject *
1905do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1906{
1907 PyObject *sep = NULL;
1908
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001909 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001910 return NULL;
1911
1912 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001913 if (PyString_Check(sep))
1914 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001915#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001916 else if (PyUnicode_Check(sep)) {
1917 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1918 PyObject *res;
1919 if (uniself==NULL)
1920 return NULL;
1921 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1922 striptype, sep);
1923 Py_DECREF(uniself);
1924 return res;
1925 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001926#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001927 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001928 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001929#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001930 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001931#else
1932 "%s arg must be None or str",
1933#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001934 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001935 return NULL;
1936 }
1937 return do_xstrip(self, striptype, sep);
1938 }
1939
1940 return do_strip(self, striptype);
1941}
1942
1943
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001944PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001945"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946\n\
1947Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001948whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001949If chars is given and not None, remove characters in chars instead.\n\
1950If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951
1952static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001953string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001955 if (PyTuple_GET_SIZE(args) == 0)
1956 return do_strip(self, BOTHSTRIP); /* Common case */
1957 else
1958 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959}
1960
1961
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001962PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001963"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001965Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001966If chars is given and not None, remove characters in chars instead.\n\
1967If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968
1969static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001970string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001972 if (PyTuple_GET_SIZE(args) == 0)
1973 return do_strip(self, LEFTSTRIP); /* Common case */
1974 else
1975 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976}
1977
1978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001979PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001980"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001983If chars is given and not None, remove characters in chars instead.\n\
1984If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985
1986static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001987string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001989 if (PyTuple_GET_SIZE(args) == 0)
1990 return do_strip(self, RIGHTSTRIP); /* Common case */
1991 else
1992 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993}
1994
1995
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001996PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997"S.lower() -> string\n\
1998\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001999Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000
2001static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002002string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
2004 char *s = PyString_AS_STRING(self), *s_new;
2005 int i, n = PyString_GET_SIZE(self);
2006 PyObject *new;
2007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 new = PyString_FromStringAndSize(NULL, n);
2009 if (new == NULL)
2010 return NULL;
2011 s_new = PyString_AsString(new);
2012 for (i = 0; i < n; i++) {
2013 int c = Py_CHARMASK(*s++);
2014 if (isupper(c)) {
2015 *s_new = tolower(c);
2016 } else
2017 *s_new = c;
2018 s_new++;
2019 }
2020 return new;
2021}
2022
2023
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025"S.upper() -> string\n\
2026\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002027Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028
2029static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002030string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031{
2032 char *s = PyString_AS_STRING(self), *s_new;
2033 int i, n = PyString_GET_SIZE(self);
2034 PyObject *new;
2035
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 new = PyString_FromStringAndSize(NULL, n);
2037 if (new == NULL)
2038 return NULL;
2039 s_new = PyString_AsString(new);
2040 for (i = 0; i < n; i++) {
2041 int c = Py_CHARMASK(*s++);
2042 if (islower(c)) {
2043 *s_new = toupper(c);
2044 } else
2045 *s_new = c;
2046 s_new++;
2047 }
2048 return new;
2049}
2050
2051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002052PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053"S.title() -> string\n\
2054\n\
2055Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057
2058static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002059string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060{
2061 char *s = PyString_AS_STRING(self), *s_new;
2062 int i, n = PyString_GET_SIZE(self);
2063 int previous_is_cased = 0;
2064 PyObject *new;
2065
Guido van Rossum4c08d552000-03-10 22:55:18 +00002066 new = PyString_FromStringAndSize(NULL, n);
2067 if (new == NULL)
2068 return NULL;
2069 s_new = PyString_AsString(new);
2070 for (i = 0; i < n; i++) {
2071 int c = Py_CHARMASK(*s++);
2072 if (islower(c)) {
2073 if (!previous_is_cased)
2074 c = toupper(c);
2075 previous_is_cased = 1;
2076 } else if (isupper(c)) {
2077 if (previous_is_cased)
2078 c = tolower(c);
2079 previous_is_cased = 1;
2080 } else
2081 previous_is_cased = 0;
2082 *s_new++ = c;
2083 }
2084 return new;
2085}
2086
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002087PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088"S.capitalize() -> string\n\
2089\n\
2090Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
2093static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002094string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095{
2096 char *s = PyString_AS_STRING(self), *s_new;
2097 int i, n = PyString_GET_SIZE(self);
2098 PyObject *new;
2099
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100 new = PyString_FromStringAndSize(NULL, n);
2101 if (new == NULL)
2102 return NULL;
2103 s_new = PyString_AsString(new);
2104 if (0 < n) {
2105 int c = Py_CHARMASK(*s++);
2106 if (islower(c))
2107 *s_new = toupper(c);
2108 else
2109 *s_new = c;
2110 s_new++;
2111 }
2112 for (i = 1; i < n; i++) {
2113 int c = Py_CHARMASK(*s++);
2114 if (isupper(c))
2115 *s_new = tolower(c);
2116 else
2117 *s_new = c;
2118 s_new++;
2119 }
2120 return new;
2121}
2122
2123
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125"S.count(sub[, start[, end]]) -> int\n\
2126\n\
2127Return the number of occurrences of substring sub in string\n\
2128S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002129interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130
2131static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002132string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 int len = PyString_GET_SIZE(self), n;
2136 int i = 0, last = INT_MAX;
2137 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
Guido van Rossumc6821402000-05-08 14:08:05 +00002140 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2141 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002143
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144 if (PyString_Check(subobj)) {
2145 sub = PyString_AS_STRING(subobj);
2146 n = PyString_GET_SIZE(subobj);
2147 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002148#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002149 else if (PyUnicode_Check(subobj)) {
2150 int count;
2151 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2152 if (count == -1)
2153 return NULL;
2154 else
2155 return PyInt_FromLong((long) count);
2156 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002157#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2159 return NULL;
2160
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002161 string_adjust_indices(&i, &last, len);
2162
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163 m = last + 1 - n;
2164 if (n == 0)
2165 return PyInt_FromLong((long) (m-i));
2166
2167 r = 0;
2168 while (i < m) {
2169 if (!memcmp(s+i, sub, n)) {
2170 r++;
2171 i += n;
2172 } else {
2173 i++;
2174 }
2175 }
2176 return PyInt_FromLong((long) r);
2177}
2178
2179
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002180PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181"S.swapcase() -> string\n\
2182\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185
2186static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002187string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188{
2189 char *s = PyString_AS_STRING(self), *s_new;
2190 int i, n = PyString_GET_SIZE(self);
2191 PyObject *new;
2192
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 new = PyString_FromStringAndSize(NULL, n);
2194 if (new == NULL)
2195 return NULL;
2196 s_new = PyString_AsString(new);
2197 for (i = 0; i < n; i++) {
2198 int c = Py_CHARMASK(*s++);
2199 if (islower(c)) {
2200 *s_new = toupper(c);
2201 }
2202 else if (isupper(c)) {
2203 *s_new = tolower(c);
2204 }
2205 else
2206 *s_new = c;
2207 s_new++;
2208 }
2209 return new;
2210}
2211
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.translate(table [,deletechars]) -> string\n\
2215\n\
2216Return a copy of the string S, where all characters occurring\n\
2217in the optional argument deletechars are removed, and the\n\
2218remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002219translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220
2221static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002222string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 register char *input, *output;
2225 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 register int i, c, changed = 0;
2227 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 int inlen, tablen, dellen = 0;
2230 PyObject *result;
2231 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002234 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237
2238 if (PyString_Check(tableobj)) {
2239 table1 = PyString_AS_STRING(tableobj);
2240 tablen = PyString_GET_SIZE(tableobj);
2241 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002242#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002244 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 parameter; instead a mapping to None will cause characters
2246 to be deleted. */
2247 if (delobj != NULL) {
2248 PyErr_SetString(PyExc_TypeError,
2249 "deletions are implemented differently for unicode");
2250 return NULL;
2251 }
2252 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2253 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002254#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002255 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257
Martin v. Löwis00b61272002-12-12 20:03:19 +00002258 if (tablen != 256) {
2259 PyErr_SetString(PyExc_ValueError,
2260 "translation table must be 256 characters long");
2261 return NULL;
2262 }
2263
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 if (delobj != NULL) {
2265 if (PyString_Check(delobj)) {
2266 del_table = PyString_AS_STRING(delobj);
2267 dellen = PyString_GET_SIZE(delobj);
2268 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002269#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270 else if (PyUnicode_Check(delobj)) {
2271 PyErr_SetString(PyExc_TypeError,
2272 "deletions are implemented differently for unicode");
2273 return NULL;
2274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002275#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2277 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 }
2279 else {
2280 del_table = NULL;
2281 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282 }
2283
2284 table = table1;
2285 inlen = PyString_Size(input_obj);
2286 result = PyString_FromStringAndSize((char *)NULL, inlen);
2287 if (result == NULL)
2288 return NULL;
2289 output_start = output = PyString_AsString(result);
2290 input = PyString_AsString(input_obj);
2291
2292 if (dellen == 0) {
2293 /* If no deletions are required, use faster code */
2294 for (i = inlen; --i >= 0; ) {
2295 c = Py_CHARMASK(*input++);
2296 if (Py_CHARMASK((*output++ = table[c])) != c)
2297 changed = 1;
2298 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002299 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300 return result;
2301 Py_DECREF(result);
2302 Py_INCREF(input_obj);
2303 return input_obj;
2304 }
2305
2306 for (i = 0; i < 256; i++)
2307 trans_table[i] = Py_CHARMASK(table[i]);
2308
2309 for (i = 0; i < dellen; i++)
2310 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2311
2312 for (i = inlen; --i >= 0; ) {
2313 c = Py_CHARMASK(*input++);
2314 if (trans_table[c] != -1)
2315 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2316 continue;
2317 changed = 1;
2318 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002319 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 Py_DECREF(result);
2321 Py_INCREF(input_obj);
2322 return input_obj;
2323 }
2324 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002325 if (inlen > 0)
2326 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327 return result;
2328}
2329
2330
2331/* What follows is used for implementing replace(). Perry Stoll. */
2332
2333/*
2334 mymemfind
2335
2336 strstr replacement for arbitrary blocks of memory.
2337
Barry Warsaw51ac5802000-03-20 16:36:48 +00002338 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 contents of memory pointed to by PAT. Returns the index into MEM if
2340 found, or -1 if not found. If len of PAT is greater than length of
2341 MEM, the function returns -1.
2342*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002343static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002344mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345{
2346 register int ii;
2347
2348 /* pattern can not occur in the last pat_len-1 chars */
2349 len -= pat_len;
2350
2351 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002352 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 return ii;
2354 }
2355 }
2356 return -1;
2357}
2358
2359/*
2360 mymemcnt
2361
2362 Return the number of distinct times PAT is found in MEM.
2363 meaning mem=1111 and pat==11 returns 2.
2364 mem=11111 and pat==11 also return 2.
2365 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002366static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002367mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368{
2369 register int offset = 0;
2370 int nfound = 0;
2371
2372 while (len >= 0) {
2373 offset = mymemfind(mem, len, pat, pat_len);
2374 if (offset == -1)
2375 break;
2376 mem += offset + pat_len;
2377 len -= offset + pat_len;
2378 nfound++;
2379 }
2380 return nfound;
2381}
2382
2383/*
2384 mymemreplace
2385
Thomas Wouters7e474022000-07-16 12:04:32 +00002386 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387 replaced with SUB.
2388
Thomas Wouters7e474022000-07-16 12:04:32 +00002389 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390 of PAT in STR, then the original string is returned. Otherwise, a new
2391 string is allocated here and returned.
2392
2393 on return, out_len is:
2394 the length of output string, or
2395 -1 if the input string is returned, or
2396 unchanged if an error occurs (no memory).
2397
2398 return value is:
2399 the new string allocated locally, or
2400 NULL if an error occurred.
2401*/
2402static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002403mymemreplace(const char *str, int len, /* input string */
2404 const char *pat, int pat_len, /* pattern string to find */
2405 const char *sub, int sub_len, /* substitution string */
2406 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002407 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408{
2409 char *out_s;
2410 char *new_s;
2411 int nfound, offset, new_len;
2412
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002413 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414 goto return_same;
2415
2416 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002417 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002418 if (count < 0)
2419 count = INT_MAX;
2420 else if (nfound > count)
2421 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 if (nfound == 0)
2423 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002424
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002426 if (new_len == 0) {
2427 /* Have to allocate something for the caller to free(). */
2428 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002429 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002430 return NULL;
2431 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002433 else {
2434 assert(new_len > 0);
2435 new_s = (char *)PyMem_MALLOC(new_len);
2436 if (new_s == NULL)
2437 return NULL;
2438 out_s = new_s;
2439
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002440 if (pat_len > 0) {
2441 for (; nfound > 0; --nfound) {
2442 /* find index of next instance of pattern */
2443 offset = mymemfind(str, len, pat, pat_len);
2444 if (offset == -1)
2445 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002446
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002447 /* copy non matching part of input string */
2448 memcpy(new_s, str, offset);
2449 str += offset + pat_len;
2450 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002451
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002452 /* copy substitute into the output string */
2453 new_s += offset;
2454 memcpy(new_s, sub, sub_len);
2455 new_s += sub_len;
2456 }
2457 /* copy any remaining values into output string */
2458 if (len > 0)
2459 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002460 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002461 else {
2462 for (;;++str, --len) {
2463 memcpy(new_s, sub, sub_len);
2464 new_s += sub_len;
2465 if (--nfound <= 0) {
2466 memcpy(new_s, str, len);
2467 break;
2468 }
2469 *new_s++ = *str;
2470 }
2471 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002472 }
2473 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474 return out_s;
2475
2476 return_same:
2477 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002478 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479}
2480
2481
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002482PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002483"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484\n\
2485Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002486old replaced by new. If the optional argument count is\n\
2487given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488
2489static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002490string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002492 const char *str = PyString_AS_STRING(self), *sub, *repl;
2493 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002494 const int len = PyString_GET_SIZE(self);
2495 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002497 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002498 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 if (!PyArg_ParseTuple(args, "OO|i:replace",
2501 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002503
2504 if (PyString_Check(subobj)) {
2505 sub = PyString_AS_STRING(subobj);
2506 sub_len = PyString_GET_SIZE(subobj);
2507 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002508#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002510 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002512#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2514 return NULL;
2515
2516 if (PyString_Check(replobj)) {
2517 repl = PyString_AS_STRING(replobj);
2518 repl_len = PyString_GET_SIZE(replobj);
2519 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002520#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002522 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002523 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002524#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2526 return NULL;
2527
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529 if (new_s == NULL) {
2530 PyErr_NoMemory();
2531 return NULL;
2532 }
2533 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002534 if (PyString_CheckExact(self)) {
2535 /* we're returning another reference to self */
2536 new = (PyObject*)self;
2537 Py_INCREF(new);
2538 }
2539 else {
2540 new = PyString_FromStringAndSize(str, len);
2541 if (new == NULL)
2542 return NULL;
2543 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544 }
2545 else {
2546 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002547 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548 }
2549 return new;
2550}
2551
2552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002553PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002554"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002555\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002556Return True if S starts with the specified prefix, False otherwise.\n\
2557With optional start, test S beginning at that position.\n\
2558With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559
2560static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002561string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002564 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566 int plen;
2567 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002568 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570
Guido van Rossumc6821402000-05-08 14:08:05 +00002571 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2572 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 return NULL;
2574 if (PyString_Check(subobj)) {
2575 prefix = PyString_AS_STRING(subobj);
2576 plen = PyString_GET_SIZE(subobj);
2577 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002578#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002579 else if (PyUnicode_Check(subobj)) {
2580 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002581 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002582 subobj, start, end, -1);
2583 if (rc == -1)
2584 return NULL;
2585 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002586 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002587 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002588#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590 return NULL;
2591
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002592 string_adjust_indices(&start, &end, len);
2593
2594 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002595 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002596
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002597 if (end-start >= plen)
2598 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2599 else
2600 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002601}
2602
2603
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002604PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002605"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002606\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002607Return True if S ends with the specified suffix, False otherwise.\n\
2608With optional start, test S beginning at that position.\n\
2609With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610
2611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002612string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002615 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 const char* suffix;
2617 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002619 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621
Guido van Rossumc6821402000-05-08 14:08:05 +00002622 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2623 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 return NULL;
2625 if (PyString_Check(subobj)) {
2626 suffix = PyString_AS_STRING(subobj);
2627 slen = PyString_GET_SIZE(subobj);
2628 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002629#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002630 else if (PyUnicode_Check(subobj)) {
2631 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002632 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002633 subobj, start, end, +1);
2634 if (rc == -1)
2635 return NULL;
2636 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002637 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002638 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002639#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002640 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002641 return NULL;
2642
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002643 string_adjust_indices(&start, &end, len);
2644
2645 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002646 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002647
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002648 if (end-slen > start)
2649 start = end - slen;
2650 if (end-start >= slen)
2651 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2652 else
2653 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654}
2655
2656
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002657PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002658"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002659\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002660Encodes S using the codec registered for encoding. encoding defaults\n\
2661to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002662handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002663a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2664'xmlcharrefreplace' as well as any other name registered with\n\
2665codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002666
2667static PyObject *
2668string_encode(PyStringObject *self, PyObject *args)
2669{
2670 char *encoding = NULL;
2671 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002672 PyObject *v;
2673
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002674 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2675 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002676 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002677 if (v == NULL)
2678 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002679 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2680 PyErr_Format(PyExc_TypeError,
2681 "encoder did not return a string/unicode object "
2682 "(type=%.400s)",
2683 v->ob_type->tp_name);
2684 Py_DECREF(v);
2685 return NULL;
2686 }
2687 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002688
2689 onError:
2690 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002691}
2692
2693
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002694PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002695"S.decode([encoding[,errors]]) -> object\n\
2696\n\
2697Decodes S using the codec registered for encoding. encoding defaults\n\
2698to the default encoding. errors may be given to set a different error\n\
2699handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002700a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2701as well as any other name registerd with codecs.register_error that is\n\
2702able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002703
2704static PyObject *
2705string_decode(PyStringObject *self, PyObject *args)
2706{
2707 char *encoding = NULL;
2708 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002709 PyObject *v;
2710
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002711 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2712 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002713 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002714 if (v == NULL)
2715 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002716 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2717 PyErr_Format(PyExc_TypeError,
2718 "decoder did not return a string/unicode object "
2719 "(type=%.400s)",
2720 v->ob_type->tp_name);
2721 Py_DECREF(v);
2722 return NULL;
2723 }
2724 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002725
2726 onError:
2727 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002728}
2729
2730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002731PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002732"S.expandtabs([tabsize]) -> string\n\
2733\n\
2734Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736
2737static PyObject*
2738string_expandtabs(PyStringObject *self, PyObject *args)
2739{
2740 const char *e, *p;
2741 char *q;
2742 int i, j;
2743 PyObject *u;
2744 int tabsize = 8;
2745
2746 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2747 return NULL;
2748
Thomas Wouters7e474022000-07-16 12:04:32 +00002749 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002750 i = j = 0;
2751 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2752 for (p = PyString_AS_STRING(self); p < e; p++)
2753 if (*p == '\t') {
2754 if (tabsize > 0)
2755 j += tabsize - (j % tabsize);
2756 }
2757 else {
2758 j++;
2759 if (*p == '\n' || *p == '\r') {
2760 i += j;
2761 j = 0;
2762 }
2763 }
2764
2765 /* Second pass: create output string and fill it */
2766 u = PyString_FromStringAndSize(NULL, i + j);
2767 if (!u)
2768 return NULL;
2769
2770 j = 0;
2771 q = PyString_AS_STRING(u);
2772
2773 for (p = PyString_AS_STRING(self); p < e; p++)
2774 if (*p == '\t') {
2775 if (tabsize > 0) {
2776 i = tabsize - (j % tabsize);
2777 j += i;
2778 while (i--)
2779 *q++ = ' ';
2780 }
2781 }
2782 else {
2783 j++;
2784 *q++ = *p;
2785 if (*p == '\n' || *p == '\r')
2786 j = 0;
2787 }
2788
2789 return u;
2790}
2791
Tim Peters8fa5dd02001-09-12 02:18:30 +00002792static PyObject *
2793pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002794{
2795 PyObject *u;
2796
2797 if (left < 0)
2798 left = 0;
2799 if (right < 0)
2800 right = 0;
2801
Tim Peters8fa5dd02001-09-12 02:18:30 +00002802 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002803 Py_INCREF(self);
2804 return (PyObject *)self;
2805 }
2806
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002807 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002808 left + PyString_GET_SIZE(self) + right);
2809 if (u) {
2810 if (left)
2811 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002812 memcpy(PyString_AS_STRING(u) + left,
2813 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814 PyString_GET_SIZE(self));
2815 if (right)
2816 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2817 fill, right);
2818 }
2819
2820 return u;
2821}
2822
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002823PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002824"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002825"\n"
2826"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002827"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828
2829static PyObject *
2830string_ljust(PyStringObject *self, PyObject *args)
2831{
2832 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002833 char fillchar = ' ';
2834
2835 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836 return NULL;
2837
Tim Peters8fa5dd02001-09-12 02:18:30 +00002838 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002839 Py_INCREF(self);
2840 return (PyObject*) self;
2841 }
2842
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002843 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844}
2845
2846
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002847PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002848"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002849"\n"
2850"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002851"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852
2853static PyObject *
2854string_rjust(PyStringObject *self, PyObject *args)
2855{
2856 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002857 char fillchar = ' ';
2858
2859 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860 return NULL;
2861
Tim Peters8fa5dd02001-09-12 02:18:30 +00002862 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002863 Py_INCREF(self);
2864 return (PyObject*) self;
2865 }
2866
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002867 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002868}
2869
2870
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002871PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002872"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002873"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002874"Return S centered in a string of length width. Padding is\n"
2875"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876
2877static PyObject *
2878string_center(PyStringObject *self, PyObject *args)
2879{
2880 int marg, left;
2881 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002882 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002883
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002884 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885 return NULL;
2886
Tim Peters8fa5dd02001-09-12 02:18:30 +00002887 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888 Py_INCREF(self);
2889 return (PyObject*) self;
2890 }
2891
2892 marg = width - PyString_GET_SIZE(self);
2893 left = marg / 2 + (marg & width & 1);
2894
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002895 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002896}
2897
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002898PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002899"S.zfill(width) -> string\n"
2900"\n"
2901"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002902"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002903
2904static PyObject *
2905string_zfill(PyStringObject *self, PyObject *args)
2906{
2907 int fill;
2908 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002909 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002910
2911 int width;
2912 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2913 return NULL;
2914
2915 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002916 if (PyString_CheckExact(self)) {
2917 Py_INCREF(self);
2918 return (PyObject*) self;
2919 }
2920 else
2921 return PyString_FromStringAndSize(
2922 PyString_AS_STRING(self),
2923 PyString_GET_SIZE(self)
2924 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002925 }
2926
2927 fill = width - PyString_GET_SIZE(self);
2928
2929 s = pad(self, fill, 0, '0');
2930
2931 if (s == NULL)
2932 return NULL;
2933
2934 p = PyString_AS_STRING(s);
2935 if (p[fill] == '+' || p[fill] == '-') {
2936 /* move sign to beginning of string */
2937 p[0] = p[fill];
2938 p[fill] = '0';
2939 }
2940
2941 return (PyObject*) s;
2942}
2943
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002944PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002945"S.isspace() -> bool\n\
2946\n\
2947Return True if all characters in S are whitespace\n\
2948and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002949
2950static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002951string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002952{
Fred Drakeba096332000-07-09 07:04:36 +00002953 register const unsigned char *p
2954 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002955 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002956
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957 /* Shortcut for single character strings */
2958 if (PyString_GET_SIZE(self) == 1 &&
2959 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002960 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002962 /* Special case for empty strings */
2963 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002965
Guido van Rossum4c08d552000-03-10 22:55:18 +00002966 e = p + PyString_GET_SIZE(self);
2967 for (; p < e; p++) {
2968 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002969 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002970 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002971 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002972}
2973
2974
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002975PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002976"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002977\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002978Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002979and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002980
2981static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002982string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002983{
Fred Drakeba096332000-07-09 07:04:36 +00002984 register const unsigned char *p
2985 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002986 register const unsigned char *e;
2987
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002988 /* Shortcut for single character strings */
2989 if (PyString_GET_SIZE(self) == 1 &&
2990 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002991 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002992
2993 /* Special case for empty strings */
2994 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002995 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002996
2997 e = p + PyString_GET_SIZE(self);
2998 for (; p < e; p++) {
2999 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003000 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003001 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003002 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003003}
3004
3005
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003006PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003008\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003009Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003010and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003011
3012static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003013string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003014{
Fred Drakeba096332000-07-09 07:04:36 +00003015 register const unsigned char *p
3016 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003017 register const unsigned char *e;
3018
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003019 /* Shortcut for single character strings */
3020 if (PyString_GET_SIZE(self) == 1 &&
3021 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003022 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003023
3024 /* Special case for empty strings */
3025 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003026 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003027
3028 e = p + PyString_GET_SIZE(self);
3029 for (; p < e; p++) {
3030 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003031 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003032 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034}
3035
3036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003037PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003038"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003040Return True if all characters in S are digits\n\
3041and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003042
3043static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003044string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003045{
Fred Drakeba096332000-07-09 07:04:36 +00003046 register const unsigned char *p
3047 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003048 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003049
Guido van Rossum4c08d552000-03-10 22:55:18 +00003050 /* Shortcut for single character strings */
3051 if (PyString_GET_SIZE(self) == 1 &&
3052 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003053 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003054
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003055 /* Special case for empty strings */
3056 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003057 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003058
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059 e = p + PyString_GET_SIZE(self);
3060 for (; p < e; p++) {
3061 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003062 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003064 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065}
3066
3067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003068PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003069"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003071Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003072at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003073
3074static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003075string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076{
Fred Drakeba096332000-07-09 07:04:36 +00003077 register const unsigned char *p
3078 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003079 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 int cased;
3081
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 /* Shortcut for single character strings */
3083 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003084 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003085
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003086 /* Special case for empty strings */
3087 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003089
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 e = p + PyString_GET_SIZE(self);
3091 cased = 0;
3092 for (; p < e; p++) {
3093 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095 else if (!cased && islower(*p))
3096 cased = 1;
3097 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003098 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099}
3100
3101
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003102PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003105Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003106at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003107
3108static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003109string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110{
Fred Drakeba096332000-07-09 07:04:36 +00003111 register const unsigned char *p
3112 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003113 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 int cased;
3115
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116 /* Shortcut for single character strings */
3117 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003118 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003119
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003120 /* Special case for empty strings */
3121 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003123
Guido van Rossum4c08d552000-03-10 22:55:18 +00003124 e = p + PyString_GET_SIZE(self);
3125 cased = 0;
3126 for (; p < e; p++) {
3127 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003128 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129 else if (!cased && isupper(*p))
3130 cased = 1;
3131 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133}
3134
3135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003136PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003137"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003139Return True if S is a titlecased string and there is at least one\n\
3140character in S, i.e. uppercase characters may only follow uncased\n\
3141characters and lowercase characters only cased ones. Return False\n\
3142otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143
3144static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003145string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146{
Fred Drakeba096332000-07-09 07:04:36 +00003147 register const unsigned char *p
3148 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003149 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 int cased, previous_is_cased;
3151
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152 /* Shortcut for single character strings */
3153 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003154 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003156 /* Special case for empty strings */
3157 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003158 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003159
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 e = p + PyString_GET_SIZE(self);
3161 cased = 0;
3162 previous_is_cased = 0;
3163 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003164 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165
3166 if (isupper(ch)) {
3167 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003168 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 previous_is_cased = 1;
3170 cased = 1;
3171 }
3172 else if (islower(ch)) {
3173 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003174 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175 previous_is_cased = 1;
3176 cased = 1;
3177 }
3178 else
3179 previous_is_cased = 0;
3180 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003181 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182}
3183
3184
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003185PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003186"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187\n\
3188Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003189Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003190is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192static PyObject*
3193string_splitlines(PyStringObject *self, PyObject *args)
3194{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195 register int i;
3196 register int j;
3197 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003198 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 PyObject *list;
3200 PyObject *str;
3201 char *data;
3202
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003203 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204 return NULL;
3205
3206 data = PyString_AS_STRING(self);
3207 len = PyString_GET_SIZE(self);
3208
Guido van Rossum4c08d552000-03-10 22:55:18 +00003209 list = PyList_New(0);
3210 if (!list)
3211 goto onError;
3212
3213 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003214 int eol;
3215
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 /* Find a line and append it */
3217 while (i < len && data[i] != '\n' && data[i] != '\r')
3218 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003219
3220 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003221 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003222 if (i < len) {
3223 if (data[i] == '\r' && i + 1 < len &&
3224 data[i+1] == '\n')
3225 i += 2;
3226 else
3227 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003228 if (keepends)
3229 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003231 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232 j = i;
3233 }
3234 if (j < len) {
3235 SPLIT_APPEND(data, j, len);
3236 }
3237
3238 return list;
3239
3240 onError:
3241 Py_DECREF(list);
3242 return NULL;
3243}
3244
3245#undef SPLIT_APPEND
3246
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003247static PyObject *
3248string_getnewargs(PyStringObject *v)
3249{
3250 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3251}
3252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003253
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003254static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003255string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003256 /* Counterparts of the obsolete stropmodule functions; except
3257 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003258 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3259 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003260 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003261 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3262 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003263 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3264 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3265 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3266 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3267 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3268 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3269 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003270 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3271 capitalize__doc__},
3272 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3273 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3274 endswith__doc__},
3275 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3276 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3277 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3278 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3279 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3280 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3281 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3282 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3283 startswith__doc__},
3284 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3285 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3286 swapcase__doc__},
3287 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3288 translate__doc__},
3289 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3290 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3291 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3292 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3293 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3294 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3295 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3296 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3297 expandtabs__doc__},
3298 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3299 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003300 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003301 {NULL, NULL} /* sentinel */
3302};
3303
Jeremy Hylton938ace62002-07-17 16:30:39 +00003304static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003305str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3306
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003307static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003308string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003309{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003310 PyObject *x = NULL;
3311 static char *kwlist[] = {"object", 0};
3312
Guido van Rossumae960af2001-08-30 03:11:59 +00003313 if (type != &PyString_Type)
3314 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003315 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3316 return NULL;
3317 if (x == NULL)
3318 return PyString_FromString("");
3319 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003320}
3321
Guido van Rossumae960af2001-08-30 03:11:59 +00003322static PyObject *
3323str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3324{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003325 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003326 int n;
3327
3328 assert(PyType_IsSubtype(type, &PyString_Type));
3329 tmp = string_new(&PyString_Type, args, kwds);
3330 if (tmp == NULL)
3331 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003332 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003333 n = PyString_GET_SIZE(tmp);
3334 pnew = type->tp_alloc(type, n);
3335 if (pnew != NULL) {
3336 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003337 ((PyStringObject *)pnew)->ob_shash =
3338 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003339 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003340 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003341 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003342 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003343}
3344
Guido van Rossumcacfc072002-05-24 19:01:59 +00003345static PyObject *
3346basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3347{
3348 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003349 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003350 return NULL;
3351}
3352
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003353static PyObject *
3354string_mod(PyObject *v, PyObject *w)
3355{
3356 if (!PyString_Check(v)) {
3357 Py_INCREF(Py_NotImplemented);
3358 return Py_NotImplemented;
3359 }
3360 return PyString_Format(v, w);
3361}
3362
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003363PyDoc_STRVAR(basestring_doc,
3364"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003365
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003366static PyNumberMethods string_as_number = {
3367 0, /*nb_add*/
3368 0, /*nb_subtract*/
3369 0, /*nb_multiply*/
3370 0, /*nb_divide*/
3371 string_mod, /*nb_remainder*/
3372};
3373
3374
Guido van Rossumcacfc072002-05-24 19:01:59 +00003375PyTypeObject PyBaseString_Type = {
3376 PyObject_HEAD_INIT(&PyType_Type)
3377 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003378 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003379 0,
3380 0,
3381 0, /* tp_dealloc */
3382 0, /* tp_print */
3383 0, /* tp_getattr */
3384 0, /* tp_setattr */
3385 0, /* tp_compare */
3386 0, /* tp_repr */
3387 0, /* tp_as_number */
3388 0, /* tp_as_sequence */
3389 0, /* tp_as_mapping */
3390 0, /* tp_hash */
3391 0, /* tp_call */
3392 0, /* tp_str */
3393 0, /* tp_getattro */
3394 0, /* tp_setattro */
3395 0, /* tp_as_buffer */
3396 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3397 basestring_doc, /* tp_doc */
3398 0, /* tp_traverse */
3399 0, /* tp_clear */
3400 0, /* tp_richcompare */
3401 0, /* tp_weaklistoffset */
3402 0, /* tp_iter */
3403 0, /* tp_iternext */
3404 0, /* tp_methods */
3405 0, /* tp_members */
3406 0, /* tp_getset */
3407 &PyBaseObject_Type, /* tp_base */
3408 0, /* tp_dict */
3409 0, /* tp_descr_get */
3410 0, /* tp_descr_set */
3411 0, /* tp_dictoffset */
3412 0, /* tp_init */
3413 0, /* tp_alloc */
3414 basestring_new, /* tp_new */
3415 0, /* tp_free */
3416};
3417
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003418PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003419"str(object) -> string\n\
3420\n\
3421Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003422If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003423
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003424PyTypeObject PyString_Type = {
3425 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003426 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003427 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003428 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003429 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003430 (destructor)string_dealloc, /* tp_dealloc */
3431 (printfunc)string_print, /* tp_print */
3432 0, /* tp_getattr */
3433 0, /* tp_setattr */
3434 0, /* tp_compare */
3435 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003436 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003437 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003438 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003439 (hashfunc)string_hash, /* tp_hash */
3440 0, /* tp_call */
3441 (reprfunc)string_str, /* tp_str */
3442 PyObject_GenericGetAttr, /* tp_getattro */
3443 0, /* tp_setattro */
3444 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003445 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3446 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003447 string_doc, /* tp_doc */
3448 0, /* tp_traverse */
3449 0, /* tp_clear */
3450 (richcmpfunc)string_richcompare, /* tp_richcompare */
3451 0, /* tp_weaklistoffset */
3452 0, /* tp_iter */
3453 0, /* tp_iternext */
3454 string_methods, /* tp_methods */
3455 0, /* tp_members */
3456 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003457 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003458 0, /* tp_dict */
3459 0, /* tp_descr_get */
3460 0, /* tp_descr_set */
3461 0, /* tp_dictoffset */
3462 0, /* tp_init */
3463 0, /* tp_alloc */
3464 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003465 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003466};
3467
3468void
Fred Drakeba096332000-07-09 07:04:36 +00003469PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003470{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003471 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003472 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003473 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003474 if (w == NULL || !PyString_Check(*pv)) {
3475 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003476 *pv = NULL;
3477 return;
3478 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003479 v = string_concat((PyStringObject *) *pv, w);
3480 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003481 *pv = v;
3482}
3483
Guido van Rossum013142a1994-08-30 08:19:36 +00003484void
Fred Drakeba096332000-07-09 07:04:36 +00003485PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003486{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003487 PyString_Concat(pv, w);
3488 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003489}
3490
3491
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003492/* The following function breaks the notion that strings are immutable:
3493 it changes the size of a string. We get away with this only if there
3494 is only one module referencing the object. You can also think of it
3495 as creating a new string object and destroying the old one, only
3496 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003497 already be known to some other part of the code...
3498 Note that if there's not enough memory to resize the string, the original
3499 string object at *pv is deallocated, *pv is set to NULL, an "out of
3500 memory" exception is set, and -1 is returned. Else (on success) 0 is
3501 returned, and the value in *pv may or may not be the same as on input.
3502 As always, an extra byte is allocated for a trailing \0 byte (newsize
3503 does *not* include that), and a trailing \0 byte is stored.
3504*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003505
3506int
Fred Drakeba096332000-07-09 07:04:36 +00003507_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003508{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 register PyObject *v;
3510 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003511 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003512 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3513 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003514 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003515 Py_DECREF(v);
3516 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003517 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003518 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003519 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003520 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003521 _Py_ForgetReference(v);
3522 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003523 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003524 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003525 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003526 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003527 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003528 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003529 _Py_NewReference(*pv);
3530 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003531 sv->ob_size = newsize;
3532 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003533 return 0;
3534}
Guido van Rossume5372401993-03-16 12:15:04 +00003535
3536/* Helpers for formatstring */
3537
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003538static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003539getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003540{
3541 int argidx = *p_argidx;
3542 if (argidx < arglen) {
3543 (*p_argidx)++;
3544 if (arglen < 0)
3545 return args;
3546 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003547 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003548 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003549 PyErr_SetString(PyExc_TypeError,
3550 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003551 return NULL;
3552}
3553
Tim Peters38fd5b62000-09-21 05:43:11 +00003554/* Format codes
3555 * F_LJUST '-'
3556 * F_SIGN '+'
3557 * F_BLANK ' '
3558 * F_ALT '#'
3559 * F_ZERO '0'
3560 */
Guido van Rossume5372401993-03-16 12:15:04 +00003561#define F_LJUST (1<<0)
3562#define F_SIGN (1<<1)
3563#define F_BLANK (1<<2)
3564#define F_ALT (1<<3)
3565#define F_ZERO (1<<4)
3566
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003567static int
Fred Drakeba096332000-07-09 07:04:36 +00003568formatfloat(char *buf, size_t buflen, int flags,
3569 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003570{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003571 /* fmt = '%#.' + `prec` + `type`
3572 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003573 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003574 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003575 x = PyFloat_AsDouble(v);
3576 if (x == -1.0 && PyErr_Occurred()) {
3577 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003578 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003579 }
Guido van Rossume5372401993-03-16 12:15:04 +00003580 if (prec < 0)
3581 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003582 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3583 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003584 /* Worst case length calc to ensure no buffer overrun:
3585
3586 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003587 fmt = %#.<prec>g
3588 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003589 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003590 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003591
3592 'f' formats:
3593 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3594 len = 1 + 50 + 1 + prec = 52 + prec
3595
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003596 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003597 always given), therefore increase the length by one.
3598
3599 */
3600 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3601 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003602 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003603 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604 return -1;
3605 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003606 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3607 (flags&F_ALT) ? "#" : "",
3608 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003609 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003610 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003611}
3612
Tim Peters38fd5b62000-09-21 05:43:11 +00003613/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3614 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3615 * Python's regular ints.
3616 * Return value: a new PyString*, or NULL if error.
3617 * . *pbuf is set to point into it,
3618 * *plen set to the # of chars following that.
3619 * Caller must decref it when done using pbuf.
3620 * The string starting at *pbuf is of the form
3621 * "-"? ("0x" | "0X")? digit+
3622 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003623 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003624 * There will be at least prec digits, zero-filled on the left if
3625 * necessary to get that many.
3626 * val object to be converted
3627 * flags bitmask of format flags; only F_ALT is looked at
3628 * prec minimum number of digits; 0-fill on left if needed
3629 * type a character in [duoxX]; u acts the same as d
3630 *
3631 * CAUTION: o, x and X conversions on regular ints can never
3632 * produce a '-' sign, but can for Python's unbounded ints.
3633 */
3634PyObject*
3635_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3636 char **pbuf, int *plen)
3637{
3638 PyObject *result = NULL;
3639 char *buf;
3640 int i;
3641 int sign; /* 1 if '-', else 0 */
3642 int len; /* number of characters */
3643 int numdigits; /* len == numnondigits + numdigits */
3644 int numnondigits = 0;
3645
3646 switch (type) {
3647 case 'd':
3648 case 'u':
3649 result = val->ob_type->tp_str(val);
3650 break;
3651 case 'o':
3652 result = val->ob_type->tp_as_number->nb_oct(val);
3653 break;
3654 case 'x':
3655 case 'X':
3656 numnondigits = 2;
3657 result = val->ob_type->tp_as_number->nb_hex(val);
3658 break;
3659 default:
3660 assert(!"'type' not in [duoxX]");
3661 }
3662 if (!result)
3663 return NULL;
3664
3665 /* To modify the string in-place, there can only be one reference. */
3666 if (result->ob_refcnt != 1) {
3667 PyErr_BadInternalCall();
3668 return NULL;
3669 }
3670 buf = PyString_AsString(result);
3671 len = PyString_Size(result);
3672 if (buf[len-1] == 'L') {
3673 --len;
3674 buf[len] = '\0';
3675 }
3676 sign = buf[0] == '-';
3677 numnondigits += sign;
3678 numdigits = len - numnondigits;
3679 assert(numdigits > 0);
3680
Tim Petersfff53252001-04-12 18:38:48 +00003681 /* Get rid of base marker unless F_ALT */
3682 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003683 /* Need to skip 0x, 0X or 0. */
3684 int skipped = 0;
3685 switch (type) {
3686 case 'o':
3687 assert(buf[sign] == '0');
3688 /* If 0 is only digit, leave it alone. */
3689 if (numdigits > 1) {
3690 skipped = 1;
3691 --numdigits;
3692 }
3693 break;
3694 case 'x':
3695 case 'X':
3696 assert(buf[sign] == '0');
3697 assert(buf[sign + 1] == 'x');
3698 skipped = 2;
3699 numnondigits -= 2;
3700 break;
3701 }
3702 if (skipped) {
3703 buf += skipped;
3704 len -= skipped;
3705 if (sign)
3706 buf[0] = '-';
3707 }
3708 assert(len == numnondigits + numdigits);
3709 assert(numdigits > 0);
3710 }
3711
3712 /* Fill with leading zeroes to meet minimum width. */
3713 if (prec > numdigits) {
3714 PyObject *r1 = PyString_FromStringAndSize(NULL,
3715 numnondigits + prec);
3716 char *b1;
3717 if (!r1) {
3718 Py_DECREF(result);
3719 return NULL;
3720 }
3721 b1 = PyString_AS_STRING(r1);
3722 for (i = 0; i < numnondigits; ++i)
3723 *b1++ = *buf++;
3724 for (i = 0; i < prec - numdigits; i++)
3725 *b1++ = '0';
3726 for (i = 0; i < numdigits; i++)
3727 *b1++ = *buf++;
3728 *b1 = '\0';
3729 Py_DECREF(result);
3730 result = r1;
3731 buf = PyString_AS_STRING(result);
3732 len = numnondigits + prec;
3733 }
3734
3735 /* Fix up case for hex conversions. */
3736 switch (type) {
3737 case 'x':
3738 /* Need to convert all upper case letters to lower case. */
3739 for (i = 0; i < len; i++)
3740 if (buf[i] >= 'A' && buf[i] <= 'F')
3741 buf[i] += 'a'-'A';
3742 break;
3743 case 'X':
3744 /* Need to convert 0x to 0X (and -0x to -0X). */
3745 if (buf[sign + 1] == 'x')
3746 buf[sign + 1] = 'X';
3747 break;
3748 }
3749 *pbuf = buf;
3750 *plen = len;
3751 return result;
3752}
3753
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003754static int
Fred Drakeba096332000-07-09 07:04:36 +00003755formatint(char *buf, size_t buflen, int flags,
3756 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003757{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003758 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003759 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3760 + 1 + 1 = 24 */
3761 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003762 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003763 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003764
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003765 x = PyInt_AsLong(v);
3766 if (x == -1 && PyErr_Occurred()) {
3767 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003768 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003769 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003770 if (x < 0 && type == 'u') {
3771 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003772 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003773 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3774 sign = "-";
3775 else
3776 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003777 if (prec < 0)
3778 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003779
3780 if ((flags & F_ALT) &&
3781 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003782 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003783 * of issues that cause pain:
3784 * - when 0 is being converted, the C standard leaves off
3785 * the '0x' or '0X', which is inconsistent with other
3786 * %#x/%#X conversions and inconsistent with Python's
3787 * hex() function
3788 * - there are platforms that violate the standard and
3789 * convert 0 with the '0x' or '0X'
3790 * (Metrowerks, Compaq Tru64)
3791 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003792 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003793 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003794 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795 * We can achieve the desired consistency by inserting our
3796 * own '0x' or '0X' prefix, and substituting %x/%X in place
3797 * of %#x/%#X.
3798 *
3799 * Note that this is the same approach as used in
3800 * formatint() in unicodeobject.c
3801 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003802 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3803 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003804 }
3805 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003806 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3807 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003808 prec, type);
3809 }
3810
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003811 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3812 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003813 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003814 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003815 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003816 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003817 return -1;
3818 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003819 if (sign[0])
3820 PyOS_snprintf(buf, buflen, fmt, -x);
3821 else
3822 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003823 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003824}
3825
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003826static int
Fred Drakeba096332000-07-09 07:04:36 +00003827formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003828{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003829 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003830 if (PyString_Check(v)) {
3831 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003832 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003833 }
3834 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003835 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003836 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003837 }
3838 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003839 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003840}
3841
Guido van Rossum013142a1994-08-30 08:19:36 +00003842
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003843/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3844
3845 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3846 chars are formatted. XXX This is a magic number. Each formatting
3847 routine does bounds checking to ensure no overflow, but a better
3848 solution may be to malloc a buffer of appropriate size for each
3849 format. For now, the current solution is sufficient.
3850*/
3851#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003852
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003853PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003854PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003855{
3856 char *fmt, *res;
3857 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003858 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003859 PyObject *result, *orig_args;
3860#ifdef Py_USING_UNICODE
3861 PyObject *v, *w;
3862#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003863 PyObject *dict = NULL;
3864 if (format == NULL || !PyString_Check(format) || args == NULL) {
3865 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003866 return NULL;
3867 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003868 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003869 fmt = PyString_AS_STRING(format);
3870 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003871 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003872 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003873 if (result == NULL)
3874 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003875 res = PyString_AsString(result);
3876 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003877 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003878 argidx = 0;
3879 }
3880 else {
3881 arglen = -1;
3882 argidx = -2;
3883 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003884 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3885 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003886 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003887 while (--fmtcnt >= 0) {
3888 if (*fmt != '%') {
3889 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003890 rescnt = fmtcnt + 100;
3891 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003892 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003893 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003894 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003895 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003896 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003897 }
3898 *res++ = *fmt++;
3899 }
3900 else {
3901 /* Got a format specifier */
3902 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003903 int width = -1;
3904 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003905 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003906 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003907 PyObject *v = NULL;
3908 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003909 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003910 int sign;
3911 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003912 char formatbuf[FORMATBUFLEN];
3913 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003914#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003915 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003916 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003917#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003918
Guido van Rossumda9c2711996-12-05 21:58:58 +00003919 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003920 if (*fmt == '(') {
3921 char *keystart;
3922 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003923 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003924 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003925
3926 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003927 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003928 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003929 goto error;
3930 }
3931 ++fmt;
3932 --fmtcnt;
3933 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003934 /* Skip over balanced parentheses */
3935 while (pcount > 0 && --fmtcnt >= 0) {
3936 if (*fmt == ')')
3937 --pcount;
3938 else if (*fmt == '(')
3939 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003940 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003941 }
3942 keylen = fmt - keystart - 1;
3943 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003944 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003945 "incomplete format key");
3946 goto error;
3947 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003948 key = PyString_FromStringAndSize(keystart,
3949 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003950 if (key == NULL)
3951 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003952 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003953 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003954 args_owned = 0;
3955 }
3956 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003957 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003958 if (args == NULL) {
3959 goto error;
3960 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003961 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003962 arglen = -1;
3963 argidx = -2;
3964 }
Guido van Rossume5372401993-03-16 12:15:04 +00003965 while (--fmtcnt >= 0) {
3966 switch (c = *fmt++) {
3967 case '-': flags |= F_LJUST; continue;
3968 case '+': flags |= F_SIGN; continue;
3969 case ' ': flags |= F_BLANK; continue;
3970 case '#': flags |= F_ALT; continue;
3971 case '0': flags |= F_ZERO; continue;
3972 }
3973 break;
3974 }
3975 if (c == '*') {
3976 v = getnextarg(args, arglen, &argidx);
3977 if (v == NULL)
3978 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 if (!PyInt_Check(v)) {
3980 PyErr_SetString(PyExc_TypeError,
3981 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003982 goto error;
3983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003985 if (width < 0) {
3986 flags |= F_LJUST;
3987 width = -width;
3988 }
Guido van Rossume5372401993-03-16 12:15:04 +00003989 if (--fmtcnt >= 0)
3990 c = *fmt++;
3991 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003992 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003993 width = c - '0';
3994 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003995 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003996 if (!isdigit(c))
3997 break;
3998 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 PyErr_SetString(
4000 PyExc_ValueError,
4001 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004002 goto error;
4003 }
4004 width = width*10 + (c - '0');
4005 }
4006 }
4007 if (c == '.') {
4008 prec = 0;
4009 if (--fmtcnt >= 0)
4010 c = *fmt++;
4011 if (c == '*') {
4012 v = getnextarg(args, arglen, &argidx);
4013 if (v == NULL)
4014 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004015 if (!PyInt_Check(v)) {
4016 PyErr_SetString(
4017 PyExc_TypeError,
4018 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004019 goto error;
4020 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004022 if (prec < 0)
4023 prec = 0;
4024 if (--fmtcnt >= 0)
4025 c = *fmt++;
4026 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004027 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004028 prec = c - '0';
4029 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004030 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004031 if (!isdigit(c))
4032 break;
4033 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004034 PyErr_SetString(
4035 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004036 "prec too big");
4037 goto error;
4038 }
4039 prec = prec*10 + (c - '0');
4040 }
4041 }
4042 } /* prec */
4043 if (fmtcnt >= 0) {
4044 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004045 if (--fmtcnt >= 0)
4046 c = *fmt++;
4047 }
4048 }
4049 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004050 PyErr_SetString(PyExc_ValueError,
4051 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004052 goto error;
4053 }
4054 if (c != '%') {
4055 v = getnextarg(args, arglen, &argidx);
4056 if (v == NULL)
4057 goto error;
4058 }
4059 sign = 0;
4060 fill = ' ';
4061 switch (c) {
4062 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004063 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004064 len = 1;
4065 break;
4066 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004067#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004068 if (PyUnicode_Check(v)) {
4069 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004070 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004071 goto unicode;
4072 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004073#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004074 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004075 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004076 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004077 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004078 else
4079 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004080 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004081 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004082 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004083 /* XXX Note: this should never happen,
4084 since PyObject_Repr() and
4085 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004086 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004087 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004088 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004089 goto error;
4090 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004091 pbuf = PyString_AS_STRING(temp);
4092 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004093 if (prec >= 0 && len > prec)
4094 len = prec;
4095 break;
4096 case 'i':
4097 case 'd':
4098 case 'u':
4099 case 'o':
4100 case 'x':
4101 case 'X':
4102 if (c == 'i')
4103 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004104 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004105 temp = _PyString_FormatLong(v, flags,
4106 prec, c, &pbuf, &len);
4107 if (!temp)
4108 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004109 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004110 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004111 else {
4112 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004113 len = formatint(pbuf,
4114 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004115 flags, prec, c, v);
4116 if (len < 0)
4117 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004118 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004119 }
4120 if (flags & F_ZERO)
4121 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004122 break;
4123 case 'e':
4124 case 'E':
4125 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004126 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004127 case 'g':
4128 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004129 if (c == 'F')
4130 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004131 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004132 len = formatfloat(pbuf, sizeof(formatbuf),
4133 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004134 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004135 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004136 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004137 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004138 fill = '0';
4139 break;
4140 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004141#ifdef Py_USING_UNICODE
4142 if (PyUnicode_Check(v)) {
4143 fmt = fmt_start;
4144 argidx = argidx_start;
4145 goto unicode;
4146 }
4147#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004148 pbuf = formatbuf;
4149 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004150 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004151 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004152 break;
4153 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004154 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004155 "unsupported format character '%c' (0x%x) "
4156 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004157 c, c,
4158 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004159 goto error;
4160 }
4161 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004162 if (*pbuf == '-' || *pbuf == '+') {
4163 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004164 len--;
4165 }
4166 else if (flags & F_SIGN)
4167 sign = '+';
4168 else if (flags & F_BLANK)
4169 sign = ' ';
4170 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004171 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004172 }
4173 if (width < len)
4174 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004175 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004176 reslen -= rescnt;
4177 rescnt = width + fmtcnt + 100;
4178 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004179 if (reslen < 0) {
4180 Py_DECREF(result);
4181 return PyErr_NoMemory();
4182 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004183 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004184 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004185 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004186 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004187 }
4188 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004189 if (fill != ' ')
4190 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004191 rescnt--;
4192 if (width > len)
4193 width--;
4194 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004195 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4196 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004197 assert(pbuf[1] == c);
4198 if (fill != ' ') {
4199 *res++ = *pbuf++;
4200 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004201 }
Tim Petersfff53252001-04-12 18:38:48 +00004202 rescnt -= 2;
4203 width -= 2;
4204 if (width < 0)
4205 width = 0;
4206 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004207 }
4208 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004209 do {
4210 --rescnt;
4211 *res++ = fill;
4212 } while (--width > len);
4213 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004214 if (fill == ' ') {
4215 if (sign)
4216 *res++ = sign;
4217 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004218 (c == 'x' || c == 'X')) {
4219 assert(pbuf[0] == '0');
4220 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 *res++ = *pbuf++;
4222 *res++ = *pbuf++;
4223 }
4224 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004225 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004226 res += len;
4227 rescnt -= len;
4228 while (--width >= len) {
4229 --rescnt;
4230 *res++ = ' ';
4231 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004232 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004233 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004234 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004235 goto error;
4236 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004237 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004238 } /* '%' */
4239 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004240 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004241 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004242 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004243 goto error;
4244 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004245 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004246 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004247 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004248 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004249 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004250
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004251#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004252 unicode:
4253 if (args_owned) {
4254 Py_DECREF(args);
4255 args_owned = 0;
4256 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004257 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004258 if (PyTuple_Check(orig_args) && argidx > 0) {
4259 PyObject *v;
4260 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4261 v = PyTuple_New(n);
4262 if (v == NULL)
4263 goto error;
4264 while (--n >= 0) {
4265 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4266 Py_INCREF(w);
4267 PyTuple_SET_ITEM(v, n, w);
4268 }
4269 args = v;
4270 } else {
4271 Py_INCREF(orig_args);
4272 args = orig_args;
4273 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004274 args_owned = 1;
4275 /* Take what we have of the result and let the Unicode formatting
4276 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004277 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004278 if (_PyString_Resize(&result, rescnt))
4279 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004280 fmtcnt = PyString_GET_SIZE(format) - \
4281 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004282 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4283 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004284 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004285 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004286 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004287 if (v == NULL)
4288 goto error;
4289 /* Paste what we have (result) to what the Unicode formatting
4290 function returned (v) and return the result (or error) */
4291 w = PyUnicode_Concat(result, v);
4292 Py_DECREF(result);
4293 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004294 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004295 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004296#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004297
Guido van Rossume5372401993-03-16 12:15:04 +00004298 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004299 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004300 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004301 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004302 }
Guido van Rossume5372401993-03-16 12:15:04 +00004303 return NULL;
4304}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004305
Guido van Rossum2a61e741997-01-18 07:55:05 +00004306void
Fred Drakeba096332000-07-09 07:04:36 +00004307PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004308{
4309 register PyStringObject *s = (PyStringObject *)(*p);
4310 PyObject *t;
4311 if (s == NULL || !PyString_Check(s))
4312 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004313 /* If it's a string subclass, we don't really know what putting
4314 it in the interned dict might do. */
4315 if (!PyString_CheckExact(s))
4316 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004317 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004318 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004319 if (interned == NULL) {
4320 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004321 if (interned == NULL) {
4322 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004323 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004324 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004325 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004326 t = PyDict_GetItem(interned, (PyObject *)s);
4327 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004328 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004329 Py_DECREF(*p);
4330 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004331 return;
4332 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004333
Armin Rigo79f7ad22004-08-07 19:27:39 +00004334 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004335 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004336 return;
4337 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004338 /* The two references in interned are not counted by refcnt.
4339 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004340 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004341 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004342}
4343
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004344void
4345PyString_InternImmortal(PyObject **p)
4346{
4347 PyString_InternInPlace(p);
4348 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4349 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4350 Py_INCREF(*p);
4351 }
4352}
4353
Guido van Rossum2a61e741997-01-18 07:55:05 +00004354
4355PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004356PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004357{
4358 PyObject *s = PyString_FromString(cp);
4359 if (s == NULL)
4360 return NULL;
4361 PyString_InternInPlace(&s);
4362 return s;
4363}
4364
Guido van Rossum8cf04761997-08-02 02:57:45 +00004365void
Fred Drakeba096332000-07-09 07:04:36 +00004366PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004367{
4368 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004369 for (i = 0; i < UCHAR_MAX + 1; i++) {
4370 Py_XDECREF(characters[i]);
4371 characters[i] = NULL;
4372 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004373 Py_XDECREF(nullstring);
4374 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004375}
Barry Warsawa903ad982001-02-23 16:40:48 +00004376
Barry Warsawa903ad982001-02-23 16:40:48 +00004377void _Py_ReleaseInternedStrings(void)
4378{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004379 PyObject *keys;
4380 PyStringObject *s;
4381 int i, n;
4382
4383 if (interned == NULL || !PyDict_Check(interned))
4384 return;
4385 keys = PyDict_Keys(interned);
4386 if (keys == NULL || !PyList_Check(keys)) {
4387 PyErr_Clear();
4388 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004389 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004390
4391 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4392 detector, interned strings are not forcibly deallocated; rather, we
4393 give them their stolen references back, and then clear and DECREF
4394 the interned dict. */
4395
4396 fprintf(stderr, "releasing interned strings\n");
4397 n = PyList_GET_SIZE(keys);
4398 for (i = 0; i < n; i++) {
4399 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4400 switch (s->ob_sstate) {
4401 case SSTATE_NOT_INTERNED:
4402 /* XXX Shouldn't happen */
4403 break;
4404 case SSTATE_INTERNED_IMMORTAL:
4405 s->ob_refcnt += 1;
4406 break;
4407 case SSTATE_INTERNED_MORTAL:
4408 s->ob_refcnt += 2;
4409 break;
4410 default:
4411 Py_FatalError("Inconsistent interned string state.");
4412 }
4413 s->ob_sstate = SSTATE_NOT_INTERNED;
4414 }
4415 Py_DECREF(keys);
4416 PyDict_Clear(interned);
4417 Py_DECREF(interned);
4418 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004419}