blob: e29ed4806fe449bbb500a2ef81110c53670ad1a8 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000073 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000074 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000076 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000078 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000082 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000100PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101{
Tim Peters62de65b2001-12-06 20:29:32 +0000102 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000103 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000104
105 assert(str != NULL);
106 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000107 if (size > INT_MAX) {
108 PyErr_SetString(PyExc_OverflowError,
109 "string is too long for a Python string");
110 return NULL;
111 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0 && (op = nullstring) != NULL) {
113#ifdef COUNT_ALLOCS
114 null_strings++;
115#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
120#ifdef COUNT_ALLOCS
121 one_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000127 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000128 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000133 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000134 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000135 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000137 PyObject *t = (PyObject *)op;
138 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000139 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Barry Warsawdadace02001-08-24 18:32:06 +0000152PyObject *
153PyString_FromFormatV(const char *format, va_list vargs)
154{
Tim Petersc15c4f12001-10-02 21:32:07 +0000155 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 int n = 0;
157 const char* f;
158 char *s;
159 PyObject* string;
160
Tim Petersc15c4f12001-10-02 21:32:07 +0000161#ifdef VA_LIST_IS_ARRAY
162 memcpy(count, vargs, sizeof(va_list));
163#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#ifdef __va_copy
165 __va_copy(count, vargs);
166#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000167 count = vargs;
168#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000169#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000170 /* step 1: figure out how large a buffer we need */
171 for (f = format; *f; f++) {
172 if (*f == '%') {
173 const char* p = f;
174 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
175 ;
176
177 /* skip the 'l' in %ld, since it doesn't change the
178 width. although only %d is supported (see
179 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000180 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000181 if (*f == 'l' && *(f+1) == 'd')
182 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000183
Barry Warsawdadace02001-08-24 18:32:06 +0000184 switch (*f) {
185 case 'c':
186 (void)va_arg(count, int);
187 /* fall through... */
188 case '%':
189 n++;
190 break;
191 case 'd': case 'i': case 'x':
192 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000193 /* 20 bytes is enough to hold a 64-bit
194 integer. Decimal takes the most space.
195 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000196 n += 20;
197 break;
198 case 's':
199 s = va_arg(count, char*);
200 n += strlen(s);
201 break;
202 case 'p':
203 (void) va_arg(count, int);
204 /* maximum 64-bit pointer representation:
205 * 0xffffffffffffffff
206 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000207 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000217 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000226 /* Since we've analyzed how much space we need for the worst case,
227 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000228 string = PyString_FromStringAndSize(NULL, n);
229 if (!string)
230 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000231
Barry Warsawdadace02001-08-24 18:32:06 +0000232 s = PyString_AsString(string);
233
234 for (f = format; *f; f++) {
235 if (*f == '%') {
236 const char* p = f++;
237 int i, longflag = 0;
238 /* parse the width.precision part (we're only
239 interested in the precision value, if any) */
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 if (*f == '.') {
244 f++;
245 n = 0;
246 while (isdigit(Py_CHARMASK(*f)))
247 n = (n*10) + *f++ - '0';
248 }
249 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
250 f++;
251 /* handle the long flag, but only for %ld. others
252 can be added when necessary. */
253 if (*f == 'l' && *(f+1) == 'd') {
254 longflag = 1;
255 ++f;
256 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000257
Barry Warsawdadace02001-08-24 18:32:06 +0000258 switch (*f) {
259 case 'c':
260 *s++ = va_arg(vargs, int);
261 break;
262 case 'd':
263 if (longflag)
264 sprintf(s, "%ld", va_arg(vargs, long));
265 else
266 sprintf(s, "%d", va_arg(vargs, int));
267 s += strlen(s);
268 break;
269 case 'i':
270 sprintf(s, "%i", va_arg(vargs, int));
271 s += strlen(s);
272 break;
273 case 'x':
274 sprintf(s, "%x", va_arg(vargs, int));
275 s += strlen(s);
276 break;
277 case 's':
278 p = va_arg(vargs, char*);
279 i = strlen(p);
280 if (n > 0 && i > n)
281 i = n;
282 memcpy(s, p, i);
283 s += i;
284 break;
285 case 'p':
286 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000287 /* %p is ill-defined: ensure leading 0x. */
288 if (s[1] == 'X')
289 s[1] = 'x';
290 else if (s[1] != 'x') {
291 memmove(s+2, s, strlen(s)+1);
292 s[0] = '0';
293 s[1] = 'x';
294 }
Barry Warsawdadace02001-08-24 18:32:06 +0000295 s += strlen(s);
296 break;
297 case '%':
298 *s++ = '%';
299 break;
300 default:
301 strcpy(s, p);
302 s += strlen(s);
303 goto end;
304 }
305 } else
306 *s++ = *f;
307 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000311 return string;
312}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000313
Barry Warsawdadace02001-08-24 18:32:06 +0000314PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000316{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000317 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318 va_list vargs;
319
320#ifdef HAVE_STDARG_PROTOTYPES
321 va_start(vargs, format);
322#else
323 va_start(vargs);
324#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000325 ret = PyString_FromFormatV(format, vargs);
326 va_end(vargs);
327 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000328}
329
330
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000331PyObject *PyString_Decode(const char *s,
332 int size,
333 const char *encoding,
334 const char *errors)
335{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000336 PyObject *v, *str;
337
338 str = PyString_FromStringAndSize(s, size);
339 if (str == NULL)
340 return NULL;
341 v = PyString_AsDecodedString(str, encoding, errors);
342 Py_DECREF(str);
343 return v;
344}
345
346PyObject *PyString_AsDecodedObject(PyObject *str,
347 const char *encoding,
348 const char *errors)
349{
350 PyObject *v;
351
352 if (!PyString_Check(str)) {
353 PyErr_BadArgument();
354 goto onError;
355 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000356
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000357 if (encoding == NULL) {
358#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000360#else
361 PyErr_SetString(PyExc_ValueError, "no encoding specified");
362 goto onError;
363#endif
364 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365
366 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000367 v = PyCodec_Decode(str, encoding, errors);
368 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000370
371 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000372
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000374 return NULL;
375}
376
377PyObject *PyString_AsDecodedString(PyObject *str,
378 const char *encoding,
379 const char *errors)
380{
381 PyObject *v;
382
383 v = PyString_AsDecodedObject(str, encoding, errors);
384 if (v == NULL)
385 goto onError;
386
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 /* Convert Unicode to a string using the default encoding */
389 if (PyUnicode_Check(v)) {
390 PyObject *temp = v;
391 v = PyUnicode_AsEncodedString(v, NULL, NULL);
392 Py_DECREF(temp);
393 if (v == NULL)
394 goto onError;
395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000396#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 if (!PyString_Check(v)) {
398 PyErr_Format(PyExc_TypeError,
399 "decoder did not return a string object (type=%.400s)",
400 v->ob_type->tp_name);
401 Py_DECREF(v);
402 goto onError;
403 }
404
405 return v;
406
407 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 return NULL;
409}
410
411PyObject *PyString_Encode(const char *s,
412 int size,
413 const char *encoding,
414 const char *errors)
415{
416 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000417
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 str = PyString_FromStringAndSize(s, size);
419 if (str == NULL)
420 return NULL;
421 v = PyString_AsEncodedString(str, encoding, errors);
422 Py_DECREF(str);
423 return v;
424}
425
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000426PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 if (!PyString_Check(str)) {
433 PyErr_BadArgument();
434 goto onError;
435 }
436
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000437 if (encoding == NULL) {
438#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000440#else
441 PyErr_SetString(PyExc_ValueError, "no encoding specified");
442 goto onError;
443#endif
444 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445
446 /* Encode via the codec registry */
447 v = PyCodec_Encode(str, encoding, errors);
448 if (v == NULL)
449 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450
451 return v;
452
453 onError:
454 return NULL;
455}
456
457PyObject *PyString_AsEncodedString(PyObject *str,
458 const char *encoding,
459 const char *errors)
460{
461 PyObject *v;
462
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000463 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464 if (v == NULL)
465 goto onError;
466
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 /* Convert Unicode to a string using the default encoding */
469 if (PyUnicode_Check(v)) {
470 PyObject *temp = v;
471 v = PyUnicode_AsEncodedString(v, NULL, NULL);
472 Py_DECREF(temp);
473 if (v == NULL)
474 goto onError;
475 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000476#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 if (!PyString_Check(v)) {
478 PyErr_Format(PyExc_TypeError,
479 "encoder did not return a string object (type=%.400s)",
480 v->ob_type->tp_name);
481 Py_DECREF(v);
482 goto onError;
483 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000484
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000485 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 onError:
488 return NULL;
489}
490
Guido van Rossum234f9421993-06-17 12:35:49 +0000491static void
Fred Drakeba096332000-07-09 07:04:36 +0000492string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000493{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000494 switch (PyString_CHECK_INTERNED(op)) {
495 case SSTATE_NOT_INTERNED:
496 break;
497
498 case SSTATE_INTERNED_MORTAL:
499 /* revive dead object temporarily for DelItem */
500 op->ob_refcnt = 3;
501 if (PyDict_DelItem(interned, op) != 0)
502 Py_FatalError(
503 "deletion of interned string failed");
504 break;
505
506 case SSTATE_INTERNED_IMMORTAL:
507 Py_FatalError("Immortal interned string died.");
508
509 default:
510 Py_FatalError("Inconsistent interned string state.");
511 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000512 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000513}
514
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000515/* Unescape a backslash-escaped string. If unicode is non-zero,
516 the string is a u-literal. If recode_encoding is non-zero,
517 the string is UTF-8 encoded and should be re-encoded in the
518 specified encoding. */
519
520PyObject *PyString_DecodeEscape(const char *s,
521 int len,
522 const char *errors,
523 int unicode,
524 const char *recode_encoding)
525{
526 int c;
527 char *p, *buf;
528 const char *end;
529 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000530 int newlen = recode_encoding ? 4*len:len;
531 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000532 if (v == NULL)
533 return NULL;
534 p = buf = PyString_AsString(v);
535 end = s + len;
536 while (s < end) {
537 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000538 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539#ifdef Py_USING_UNICODE
540 if (recode_encoding && (*s & 0x80)) {
541 PyObject *u, *w;
542 char *r;
543 const char* t;
544 int rn;
545 t = s;
546 /* Decode non-ASCII bytes as UTF-8. */
547 while (t < end && (*t & 0x80)) t++;
548 u = PyUnicode_DecodeUTF8(s, t - s, errors);
549 if(!u) goto failed;
550
551 /* Recode them in target encoding. */
552 w = PyUnicode_AsEncodedString(
553 u, recode_encoding, errors);
554 Py_DECREF(u);
555 if (!w) goto failed;
556
557 /* Append bytes to output buffer. */
558 r = PyString_AsString(w);
559 rn = PyString_Size(w);
560 memcpy(p, r, rn);
561 p += rn;
562 Py_DECREF(w);
563 s = t;
564 } else {
565 *p++ = *s++;
566 }
567#else
568 *p++ = *s++;
569#endif
570 continue;
571 }
572 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000573 if (s==end) {
574 PyErr_SetString(PyExc_ValueError,
575 "Trailing \\ in string");
576 goto failed;
577 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 switch (*s++) {
579 /* XXX This assumes ASCII! */
580 case '\n': break;
581 case '\\': *p++ = '\\'; break;
582 case '\'': *p++ = '\''; break;
583 case '\"': *p++ = '\"'; break;
584 case 'b': *p++ = '\b'; break;
585 case 'f': *p++ = '\014'; break; /* FF */
586 case 't': *p++ = '\t'; break;
587 case 'n': *p++ = '\n'; break;
588 case 'r': *p++ = '\r'; break;
589 case 'v': *p++ = '\013'; break; /* VT */
590 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
591 case '0': case '1': case '2': case '3':
592 case '4': case '5': case '6': case '7':
593 c = s[-1] - '0';
594 if ('0' <= *s && *s <= '7') {
595 c = (c<<3) + *s++ - '0';
596 if ('0' <= *s && *s <= '7')
597 c = (c<<3) + *s++ - '0';
598 }
599 *p++ = c;
600 break;
601 case 'x':
602 if (isxdigit(Py_CHARMASK(s[0]))
603 && isxdigit(Py_CHARMASK(s[1]))) {
604 unsigned int x = 0;
605 c = Py_CHARMASK(*s);
606 s++;
607 if (isdigit(c))
608 x = c - '0';
609 else if (islower(c))
610 x = 10 + c - 'a';
611 else
612 x = 10 + c - 'A';
613 x = x << 4;
614 c = Py_CHARMASK(*s);
615 s++;
616 if (isdigit(c))
617 x += c - '0';
618 else if (islower(c))
619 x += 10 + c - 'a';
620 else
621 x += 10 + c - 'A';
622 *p++ = x;
623 break;
624 }
625 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 PyErr_SetString(PyExc_ValueError,
627 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000628 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000629 }
630 if (strcmp(errors, "replace") == 0) {
631 *p++ = '?';
632 } else if (strcmp(errors, "ignore") == 0)
633 /* do nothing */;
634 else {
635 PyErr_Format(PyExc_ValueError,
636 "decoding error; "
637 "unknown error handling code: %.400s",
638 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000639 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000640 }
641#ifndef Py_USING_UNICODE
642 case 'u':
643 case 'U':
644 case 'N':
645 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000646 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000647 "Unicode escapes not legal "
648 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000649 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000650 }
651#endif
652 default:
653 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000654 s--;
655 goto non_esc; /* an arbitry number of unescaped
656 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000657 }
658 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000659 if (p-buf < newlen)
660 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 return v;
662 failed:
663 Py_DECREF(v);
664 return NULL;
665}
666
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000667static int
668string_getsize(register PyObject *op)
669{
670 char *s;
671 int len;
672 if (PyString_AsStringAndSize(op, &s, &len))
673 return -1;
674 return len;
675}
676
677static /*const*/ char *
678string_getbuffer(register PyObject *op)
679{
680 char *s;
681 int len;
682 if (PyString_AsStringAndSize(op, &s, &len))
683 return NULL;
684 return s;
685}
686
Guido van Rossumd7047b31995-01-02 19:07:15 +0000687int
Fred Drakeba096332000-07-09 07:04:36 +0000688PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000690 if (!PyString_Check(op))
691 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693}
694
695/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000696PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (!PyString_Check(op))
699 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000703int
704PyString_AsStringAndSize(register PyObject *obj,
705 register char **s,
706 register int *len)
707{
708 if (s == NULL) {
709 PyErr_BadInternalCall();
710 return -1;
711 }
712
713 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000714#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (PyUnicode_Check(obj)) {
716 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
717 if (obj == NULL)
718 return -1;
719 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000720 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000721#endif
722 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 PyErr_Format(PyExc_TypeError,
724 "expected string or Unicode object, "
725 "%.200s found", obj->ob_type->tp_name);
726 return -1;
727 }
728 }
729
730 *s = PyString_AS_STRING(obj);
731 if (len != NULL)
732 *len = PyString_GET_SIZE(obj);
733 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
734 PyErr_SetString(PyExc_TypeError,
735 "expected string without null bytes");
736 return -1;
737 }
738 return 0;
739}
740
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741/* Methods */
742
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000743static int
Fred Drakeba096332000-07-09 07:04:36 +0000744string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745{
746 int i;
747 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000748 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000749
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000750 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000751 if (! PyString_CheckExact(op)) {
752 int ret;
753 /* A str subclass may have its own __str__ method. */
754 op = (PyStringObject *) PyObject_Str((PyObject *)op);
755 if (op == NULL)
756 return -1;
757 ret = string_print(op, fp, flags);
758 Py_DECREF(op);
759 return ret;
760 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000762#ifdef __VMS
763 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
764#else
765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
766#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000769
Thomas Wouters7e474022000-07-16 12:04:32 +0000770 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000772 if (memchr(op->ob_sval, '\'', op->ob_size) &&
773 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 quote = '"';
775
776 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 for (i = 0; i < op->ob_size; i++) {
778 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000779 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000780 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000781 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\r");
787 else if (c < ' ' || c >= 0x7f)
788 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000789 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794}
795
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000796PyObject *
797PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000799 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000800 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000801 PyObject *v;
802 if (newsize > INT_MAX) {
803 PyErr_SetString(PyExc_OverflowError,
804 "string is too large to make repr");
805 }
806 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 }
810 else {
811 register int i;
812 register char c;
813 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 int quote;
815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000818 if (smartquotes &&
819 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000820 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 quote = '"';
822
Tim Peters9161c8b2001-12-03 01:55:38 +0000823 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000826 /* There's at least enough room for a hex escape
827 and a closing quote. */
828 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000832 else if (c == '\t')
833 *p++ = '\\', *p++ = 't';
834 else if (c == '\n')
835 *p++ = '\\', *p++ = 'n';
836 else if (c == '\r')
837 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000838 else if (c < ' ' || c >= 0x7f) {
839 /* For performance, we don't want to call
840 PyOS_snprintf here (extra layers of
841 function call). */
842 sprintf(p, "\\x%02x", c & 0xff);
843 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000844 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000845 else
846 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Guido van Rossum189f1df2001-05-01 16:51:53 +0000857static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858string_repr(PyObject *op)
859{
860 return PyString_Repr(op, 1);
861}
862
863static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000864string_str(PyObject *s)
865{
Tim Petersc9933152001-10-16 20:18:24 +0000866 assert(PyString_Check(s));
867 if (PyString_CheckExact(s)) {
868 Py_INCREF(s);
869 return s;
870 }
871 else {
872 /* Subtype -- return genuine string with the same value. */
873 PyStringObject *t = (PyStringObject *) s;
874 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
875 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000876}
877
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878static int
Fred Drakeba096332000-07-09 07:04:36 +0000879string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880{
881 return a->ob_size;
882}
883
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000884static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000885string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 register PyStringObject *op;
889 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 if (PyUnicode_Check(bb))
892 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000894 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000895 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000896 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 return NULL;
898 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000901 if ((a->ob_size == 0 || b->ob_size == 0) &&
902 PyString_CheckExact(a) && PyString_CheckExact(b)) {
903 if (a->ob_size == 0) {
904 Py_INCREF(bb);
905 return bb;
906 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 Py_INCREF(a);
908 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
910 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000911 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000912 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000929 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000930 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000932 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 if (n < 0)
934 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000935 /* watch out for overflows: the size can overflow int,
936 * and the # of bytes needed can overflow size_t
937 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000939 if (n && size / n != a->ob_size) {
940 PyErr_SetString(PyExc_OverflowError,
941 "repeated string is too long");
942 return NULL;
943 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 Py_INCREF(a);
946 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 }
Tim Peterse7c05322004-06-27 17:24:49 +0000948 nbytes = (size_t)size;
949 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000961 op->ob_sval[size] = '\0';
962 if (a->ob_size == 1 && n > 0) {
963 memset(op->ob_sval, a->ob_sval[0] , n);
964 return (PyObject *) op;
965 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000966 i = 0;
967 if (i < size) {
968 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
969 i = (int) a->ob_size;
970 }
971 while (i < size) {
972 j = (i <= size-i) ? i : size-i;
973 memcpy(op->ob_sval+i, op->ob_sval, j);
974 i += j;
975 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977}
978
979/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_slice(register PyStringObject *a, register int i, register int j)
983 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984{
985 if (i < 0)
986 i = 0;
987 if (j < 0)
988 j = 0; /* Avoid signed/unsigned bug in next line */
989 if (j > a->ob_size)
990 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000991 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
992 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 Py_INCREF(a);
994 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000995 }
996 if (j < i)
997 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999}
1000
Guido van Rossum9284a572000-03-07 15:53:43 +00001001static int
Fred Drakeba096332000-07-09 07:04:36 +00001002string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001003{
Barry Warsaw817918c2002-08-06 16:58:21 +00001004 const char *lhs, *rhs, *end;
1005 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006
1007 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001008#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001009 if (PyUnicode_Check(el))
1010 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 if (!PyString_Check(el)) {
1013 PyErr_SetString(PyExc_TypeError,
1014 "'in <string>' requires string as left operand");
1015 return -1;
1016 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001018 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001019 rhs = PyString_AS_STRING(el);
1020 lhs = PyString_AS_STRING(a);
1021
1022 /* optimize for a single character */
1023 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001024 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001025
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 while (lhs <= end) {
1028 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001029 return 1;
1030 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001031
Guido van Rossum9284a572000-03-07 15:53:43 +00001032 return 0;
1033}
1034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001035static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001036string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001039 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042 return NULL;
1043 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001044 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001045 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001046 if (v == NULL)
1047 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001048 else {
1049#ifdef COUNT_ALLOCS
1050 one_strings++;
1051#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001052 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001053 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001054 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001055}
1056
Martin v. Löwiscd353062001-05-24 16:56:35 +00001057static PyObject*
1058string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001060 int c;
1061 int len_a, len_b;
1062 int min_len;
1063 PyObject *result;
1064
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001065 /* Make sure both arguments are strings. */
1066 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001067 result = Py_NotImplemented;
1068 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001069 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001070 if (a == b) {
1071 switch (op) {
1072 case Py_EQ:case Py_LE:case Py_GE:
1073 result = Py_True;
1074 goto out;
1075 case Py_NE:case Py_LT:case Py_GT:
1076 result = Py_False;
1077 goto out;
1078 }
1079 }
1080 if (op == Py_EQ) {
1081 /* Supporting Py_NE here as well does not save
1082 much time, since Py_NE is rarely used. */
1083 if (a->ob_size == b->ob_size
1084 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001085 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086 a->ob_size) == 0)) {
1087 result = Py_True;
1088 } else {
1089 result = Py_False;
1090 }
1091 goto out;
1092 }
1093 len_a = a->ob_size; len_b = b->ob_size;
1094 min_len = (len_a < len_b) ? len_a : len_b;
1095 if (min_len > 0) {
1096 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1097 if (c==0)
1098 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1099 }else
1100 c = 0;
1101 if (c == 0)
1102 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1103 switch (op) {
1104 case Py_LT: c = c < 0; break;
1105 case Py_LE: c = c <= 0; break;
1106 case Py_EQ: assert(0); break; /* unreachable */
1107 case Py_NE: c = c != 0; break;
1108 case Py_GT: c = c > 0; break;
1109 case Py_GE: c = c >= 0; break;
1110 default:
1111 result = Py_NotImplemented;
1112 goto out;
1113 }
1114 result = c ? Py_True : Py_False;
1115 out:
1116 Py_INCREF(result);
1117 return result;
1118}
1119
1120int
1121_PyString_Eq(PyObject *o1, PyObject *o2)
1122{
1123 PyStringObject *a, *b;
1124 a = (PyStringObject*)o1;
1125 b = (PyStringObject*)o2;
1126 return a->ob_size == b->ob_size
1127 && *a->ob_sval == *b->ob_sval
1128 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001129}
1130
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131static long
Fred Drakeba096332000-07-09 07:04:36 +00001132string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001134 register int len;
1135 register unsigned char *p;
1136 register long x;
1137
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 if (a->ob_shash != -1)
1139 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001140 len = a->ob_size;
1141 p = (unsigned char *) a->ob_sval;
1142 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001143 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001144 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145 x ^= a->ob_size;
1146 if (x == -1)
1147 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 return x;
1150}
1151
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001152static PyObject*
1153string_subscript(PyStringObject* self, PyObject* item)
1154{
1155 if (PyInt_Check(item)) {
1156 long i = PyInt_AS_LONG(item);
1157 if (i < 0)
1158 i += PyString_GET_SIZE(self);
1159 return string_item(self,i);
1160 }
1161 else if (PyLong_Check(item)) {
1162 long i = PyLong_AsLong(item);
1163 if (i == -1 && PyErr_Occurred())
1164 return NULL;
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PySlice_Check(item)) {
1170 int start, stop, step, slicelength, cur, i;
1171 char* source_buf;
1172 char* result_buf;
1173 PyObject* result;
1174
1175 if (PySlice_GetIndicesEx((PySliceObject*)item,
1176 PyString_GET_SIZE(self),
1177 &start, &stop, &step, &slicelength) < 0) {
1178 return NULL;
1179 }
1180
1181 if (slicelength <= 0) {
1182 return PyString_FromStringAndSize("", 0);
1183 }
1184 else {
1185 source_buf = PyString_AsString((PyObject*)self);
1186 result_buf = PyMem_Malloc(slicelength);
1187
1188 for (cur = start, i = 0; i < slicelength;
1189 cur += step, i++) {
1190 result_buf[i] = source_buf[cur];
1191 }
1192
1193 result = PyString_FromStringAndSize(result_buf,
1194 slicelength);
1195 PyMem_Free(result_buf);
1196 return result;
1197 }
1198 }
1199 else {
1200 PyErr_SetString(PyExc_TypeError,
1201 "string indices must be integers");
1202 return NULL;
1203 }
1204}
1205
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
1209 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001210 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001211 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212 return -1;
1213 }
1214 *ptr = (void *)self->ob_sval;
1215 return self->ob_size;
1216}
1217
1218static int
Fred Drakeba096332000-07-09 07:04:36 +00001219string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220{
Guido van Rossum045e6881997-09-08 18:30:11 +00001221 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001222 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223 return -1;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
1229 if ( lenp )
1230 *lenp = self->ob_size;
1231 return 1;
1232}
1233
Guido van Rossum1db70701998-10-08 02:18:52 +00001234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001236{
1237 if ( index != 0 ) {
1238 PyErr_SetString(PyExc_SystemError,
1239 "accessing non-existent string segment");
1240 return -1;
1241 }
1242 *ptr = self->ob_sval;
1243 return self->ob_size;
1244}
1245
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001246static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001247 (inquiry)string_length, /*sq_length*/
1248 (binaryfunc)string_concat, /*sq_concat*/
1249 (intargfunc)string_repeat, /*sq_repeat*/
1250 (intargfunc)string_item, /*sq_item*/
1251 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001252 0, /*sq_ass_item*/
1253 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001254 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001255};
1256
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001257static PyMappingMethods string_as_mapping = {
1258 (inquiry)string_length,
1259 (binaryfunc)string_subscript,
1260 0,
1261};
1262
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263static PyBufferProcs string_as_buffer = {
1264 (getreadbufferproc)string_buffer_getreadbuf,
1265 (getwritebufferproc)string_buffer_getwritebuf,
1266 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001267 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268};
1269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
1271
1272#define LEFTSTRIP 0
1273#define RIGHTSTRIP 1
1274#define BOTHSTRIP 2
1275
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001276/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001277static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1278
1279#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001281#define SPLIT_APPEND(data, left, right) \
1282 str = PyString_FromStringAndSize((data) + (left), \
1283 (right) - (left)); \
1284 if (str == NULL) \
1285 goto onError; \
1286 if (PyList_Append(list, str)) { \
1287 Py_DECREF(str); \
1288 goto onError; \
1289 } \
1290 else \
1291 Py_DECREF(str);
1292
1293#define SPLIT_INSERT(data, left, right) \
1294 str = PyString_FromStringAndSize((data) + (left), \
1295 (right) - (left)); \
1296 if (str == NULL) \
1297 goto onError; \
1298 if (PyList_Insert(list, 0, str)) { \
1299 Py_DECREF(str); \
1300 goto onError; \
1301 } \
1302 else \
1303 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001306split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001308 int i, j;
1309 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 PyObject *list = PyList_New(0);
1311
1312 if (list == NULL)
1313 return NULL;
1314
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 for (i = j = 0; i < len; ) {
1316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 while (i < len && !isspace(Py_CHARMASK(s[i])))
1320 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 if (maxsplit-- <= 0)
1323 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001325 while (i < len && isspace(Py_CHARMASK(s[i])))
1326 i++;
1327 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 }
1329 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001331 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 Py_DECREF(list);
1336 return NULL;
1337}
1338
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339static PyObject *
1340split_char(const char *s, int len, char ch, int maxcount)
1341{
1342 register int i, j;
1343 PyObject *str;
1344 PyObject *list = PyList_New(0);
1345
1346 if (list == NULL)
1347 return NULL;
1348
1349 for (i = j = 0; i < len; ) {
1350 if (s[i] == ch) {
1351 if (maxcount-- <= 0)
1352 break;
1353 SPLIT_APPEND(s, j, i);
1354 i = j = i + 1;
1355 } else
1356 i++;
1357 }
1358 if (j <= len) {
1359 SPLIT_APPEND(s, j, len);
1360 }
1361 return list;
1362
1363 onError:
1364 Py_DECREF(list);
1365 return NULL;
1366}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001368PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369"S.split([sep [,maxsplit]]) -> list of strings\n\
1370\n\
1371Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001373splits are done. If sep is not specified or is None, any\n\
1374whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375
1376static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001377string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378{
1379 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 int maxsplit = -1;
1381 const char *s = PyString_AS_STRING(self), *sub;
1382 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Guido van Rossum4c08d552000-03-10 22:55:18 +00001384 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 if (maxsplit < 0)
1387 maxsplit = INT_MAX;
1388 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (PyString_Check(subobj)) {
1391 sub = PyString_AS_STRING(subobj);
1392 n = PyString_GET_SIZE(subobj);
1393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001394#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 else if (PyUnicode_Check(subobj))
1396 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1399 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001400
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 if (n == 0) {
1402 PyErr_SetString(PyExc_ValueError, "empty separator");
1403 return NULL;
1404 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001405 else if (n == 1)
1406 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407
1408 list = PyList_New(0);
1409 if (list == NULL)
1410 return NULL;
1411
1412 i = j = 0;
1413 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001414 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 if (maxsplit-- <= 0)
1416 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1418 if (item == NULL)
1419 goto fail;
1420 err = PyList_Append(list, item);
1421 Py_DECREF(item);
1422 if (err < 0)
1423 goto fail;
1424 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 }
1426 else
1427 i++;
1428 }
1429 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1430 if (item == NULL)
1431 goto fail;
1432 err = PyList_Append(list, item);
1433 Py_DECREF(item);
1434 if (err < 0)
1435 goto fail;
1436
1437 return list;
1438
1439 fail:
1440 Py_DECREF(list);
1441 return NULL;
1442}
1443
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001444static PyObject *
1445rsplit_whitespace(const char *s, int len, int maxsplit)
1446{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 int i, j;
1448 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001449 PyObject *list = PyList_New(0);
1450
1451 if (list == NULL)
1452 return NULL;
1453
1454 for (i = j = len - 1; i >= 0; ) {
1455 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1456 i--;
1457 j = i;
1458 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1459 i--;
1460 if (j > i) {
1461 if (maxsplit-- <= 0)
1462 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001464 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1465 i--;
1466 j = i;
1467 }
1468 }
1469 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001470 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001471 }
1472 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001473 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474 Py_DECREF(list);
1475 return NULL;
1476}
1477
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478static PyObject *
1479rsplit_char(const char *s, int len, char ch, int maxcount)
1480{
1481 register int i, j;
1482 PyObject *str;
1483 PyObject *list = PyList_New(0);
1484
1485 if (list == NULL)
1486 return NULL;
1487
1488 for (i = j = len - 1; i >= 0; ) {
1489 if (s[i] == ch) {
1490 if (maxcount-- <= 0)
1491 break;
1492 SPLIT_INSERT(s, i + 1, j + 1);
1493 j = i = i - 1;
1494 } else
1495 i--;
1496 }
1497 if (j >= -1) {
1498 SPLIT_INSERT(s, 0, j + 1);
1499 }
1500 return list;
1501
1502 onError:
1503 Py_DECREF(list);
1504 return NULL;
1505}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001506
1507PyDoc_STRVAR(rsplit__doc__,
1508"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1509\n\
1510Return a list of the words in the string S, using sep as the\n\
1511delimiter string, starting at the end of the string and working\n\
1512to the front. If maxsplit is given, at most maxsplit splits are\n\
1513done. If sep is not specified or is None, any whitespace string\n\
1514is a separator.");
1515
1516static PyObject *
1517string_rsplit(PyStringObject *self, PyObject *args)
1518{
1519 int len = PyString_GET_SIZE(self), n, i, j, err;
1520 int maxsplit = -1;
1521 const char *s = PyString_AS_STRING(self), *sub;
1522 PyObject *list, *item, *subobj = Py_None;
1523
1524 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1525 return NULL;
1526 if (maxsplit < 0)
1527 maxsplit = INT_MAX;
1528 if (subobj == Py_None)
1529 return rsplit_whitespace(s, len, maxsplit);
1530 if (PyString_Check(subobj)) {
1531 sub = PyString_AS_STRING(subobj);
1532 n = PyString_GET_SIZE(subobj);
1533 }
1534#ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(subobj))
1536 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1537#endif
1538 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1539 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001540
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541 if (n == 0) {
1542 PyErr_SetString(PyExc_ValueError, "empty separator");
1543 return NULL;
1544 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001545 else if (n == 1)
1546 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001547
1548 list = PyList_New(0);
1549 if (list == NULL)
1550 return NULL;
1551
1552 j = len;
1553 i = j - n;
1554 while (i >= 0) {
1555 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1556 if (maxsplit-- <= 0)
1557 break;
1558 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1559 if (item == NULL)
1560 goto fail;
1561 err = PyList_Insert(list, 0, item);
1562 Py_DECREF(item);
1563 if (err < 0)
1564 goto fail;
1565 j = i;
1566 i -= n;
1567 }
1568 else
1569 i--;
1570 }
1571 item = PyString_FromStringAndSize(s, j);
1572 if (item == NULL)
1573 goto fail;
1574 err = PyList_Insert(list, 0, item);
1575 Py_DECREF(item);
1576 if (err < 0)
1577 goto fail;
1578
1579 return list;
1580
1581 fail:
1582 Py_DECREF(list);
1583 return NULL;
1584}
1585
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.join(sequence) -> string\n\
1589\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592
1593static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001594string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595{
1596 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001597 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 char *p;
1600 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001601 size_t sz = 0;
1602 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001603 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 seq = PySequence_Fast(orig, "");
1606 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001607 if (PyErr_ExceptionMatches(PyExc_TypeError))
1608 PyErr_Format(PyExc_TypeError,
1609 "sequence expected, %.80s found",
1610 orig->ob_type->tp_name);
1611 return NULL;
1612 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001613
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001614 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 if (seqlen == 0) {
1616 Py_DECREF(seq);
1617 return PyString_FromString("");
1618 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001620 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1622 PyErr_Format(PyExc_TypeError,
1623 "sequence item 0: expected string,"
1624 " %.80s found",
1625 item->ob_type->tp_name);
1626 Py_DECREF(seq);
1627 return NULL;
1628 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001629 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001630 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001633
Tim Peters19fe14e2001-01-19 03:03:47 +00001634 /* There are at least two things to join. Do a pre-pass to figure out
1635 * the total amount of space we'll need (sz), see whether any argument
1636 * is absurd, and defer to the Unicode join if appropriate.
1637 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001640 item = PySequence_Fast_GET_ITEM(seq, i);
1641 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001642#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001644 /* Defer to Unicode join.
1645 * CAUTION: There's no gurantee that the
1646 * original sequence can be iterated over
1647 * again, so we must pass seq here.
1648 */
1649 PyObject *result;
1650 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001651 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001652 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001653 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001654#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001656 "sequence item %i: expected string,"
1657 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001658 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001659 Py_DECREF(seq);
1660 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 sz += PyString_GET_SIZE(item);
1663 if (i != 0)
1664 sz += seplen;
1665 if (sz < old_sz || sz > INT_MAX) {
1666 PyErr_SetString(PyExc_OverflowError,
1667 "join() is too long for a Python string");
1668 Py_DECREF(seq);
1669 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001671 }
1672
1673 /* Allocate result space. */
1674 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1675 if (res == NULL) {
1676 Py_DECREF(seq);
1677 return NULL;
1678 }
1679
1680 /* Catenate everything. */
1681 p = PyString_AS_STRING(res);
1682 for (i = 0; i < seqlen; ++i) {
1683 size_t n;
1684 item = PySequence_Fast_GET_ITEM(seq, i);
1685 n = PyString_GET_SIZE(item);
1686 memcpy(p, PyString_AS_STRING(item), n);
1687 p += n;
1688 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 memcpy(p, sep, seplen);
1690 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001691 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693
Jeremy Hylton49048292000-07-11 03:28:17 +00001694 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696}
1697
Tim Peters52e155e2001-06-16 05:42:57 +00001698PyObject *
1699_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001700{
Tim Petersa7259592001-06-16 05:11:17 +00001701 assert(sep != NULL && PyString_Check(sep));
1702 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001704}
1705
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001706static void
1707string_adjust_indices(int *start, int *end, int len)
1708{
1709 if (*end > len)
1710 *end = len;
1711 else if (*end < 0)
1712 *end += len;
1713 if (*end < 0)
1714 *end = 0;
1715 if (*start < 0)
1716 *start += len;
1717 if (*start < 0)
1718 *start = 0;
1719}
1720
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721static long
Fred Drakeba096332000-07-09 07:04:36 +00001722string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001724 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 int len = PyString_GET_SIZE(self);
1726 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001727 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001729 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001730 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 return -2;
1732 if (PyString_Check(subobj)) {
1733 sub = PyString_AS_STRING(subobj);
1734 n = PyString_GET_SIZE(subobj);
1735 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001736#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001738 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001739#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 return -2;
1742
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001743 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 if (dir > 0) {
1746 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 last -= n;
1749 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001750 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 return (long)i;
1752 }
1753 else {
1754 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001755
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 if (n == 0 && i <= last)
1757 return (long)last;
1758 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001759 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 return (long)j;
1761 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001762
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 return -1;
1764}
1765
1766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001767PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768"S.find(sub [,start [,end]]) -> int\n\
1769\n\
1770Return the lowest index in S where substring sub is found,\n\
1771such that sub is contained within s[start,end]. Optional\n\
1772arguments start and end are interpreted as in slice notation.\n\
1773\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001774Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775
1776static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001777string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780 if (result == -2)
1781 return NULL;
1782 return PyInt_FromLong(result);
1783}
1784
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787"S.index(sub [,start [,end]]) -> int\n\
1788\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001789Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790
1791static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001792string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001794 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 if (result == -2)
1796 return NULL;
1797 if (result == -1) {
1798 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001799 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800 return NULL;
1801 }
1802 return PyInt_FromLong(result);
1803}
1804
1805
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807"S.rfind(sub [,start [,end]]) -> int\n\
1808\n\
1809Return the highest index in S where substring sub is found,\n\
1810such that sub is contained within s[start,end]. Optional\n\
1811arguments start and end are interpreted as in slice notation.\n\
1812\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001818 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (result == -2)
1820 return NULL;
1821 return PyInt_FromLong(result);
1822}
1823
1824
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826"S.rindex(sub [,start [,end]]) -> int\n\
1827\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001828Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829
1830static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001831string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001833 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 if (result == -2)
1835 return NULL;
1836 if (result == -1) {
1837 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001838 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 return NULL;
1840 }
1841 return PyInt_FromLong(result);
1842}
1843
1844
1845static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001846do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1847{
1848 char *s = PyString_AS_STRING(self);
1849 int len = PyString_GET_SIZE(self);
1850 char *sep = PyString_AS_STRING(sepobj);
1851 int seplen = PyString_GET_SIZE(sepobj);
1852 int i, j;
1853
1854 i = 0;
1855 if (striptype != RIGHTSTRIP) {
1856 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1857 i++;
1858 }
1859 }
1860
1861 j = len;
1862 if (striptype != LEFTSTRIP) {
1863 do {
1864 j--;
1865 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1866 j++;
1867 }
1868
1869 if (i == 0 && j == len && PyString_CheckExact(self)) {
1870 Py_INCREF(self);
1871 return (PyObject*)self;
1872 }
1873 else
1874 return PyString_FromStringAndSize(s+i, j-i);
1875}
1876
1877
1878static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001879do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880{
1881 char *s = PyString_AS_STRING(self);
1882 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 i = 0;
1885 if (striptype != RIGHTSTRIP) {
1886 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1887 i++;
1888 }
1889 }
1890
1891 j = len;
1892 if (striptype != LEFTSTRIP) {
1893 do {
1894 j--;
1895 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1896 j++;
1897 }
1898
Tim Peters8fa5dd02001-09-12 02:18:30 +00001899 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 Py_INCREF(self);
1901 return (PyObject*)self;
1902 }
1903 else
1904 return PyString_FromStringAndSize(s+i, j-i);
1905}
1906
1907
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001908static PyObject *
1909do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1910{
1911 PyObject *sep = NULL;
1912
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001913 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001914 return NULL;
1915
1916 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001917 if (PyString_Check(sep))
1918 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001919#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001920 else if (PyUnicode_Check(sep)) {
1921 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1922 PyObject *res;
1923 if (uniself==NULL)
1924 return NULL;
1925 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1926 striptype, sep);
1927 Py_DECREF(uniself);
1928 return res;
1929 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001930#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001931 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001933#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001934 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001935#else
1936 "%s arg must be None or str",
1937#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001938 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001939 return NULL;
1940 }
1941 return do_xstrip(self, striptype, sep);
1942 }
1943
1944 return do_strip(self, striptype);
1945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001949"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950\n\
1951Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001952whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001953If chars is given and not None, remove characters in chars instead.\n\
1954If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955
1956static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001957string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959 if (PyTuple_GET_SIZE(args) == 0)
1960 return do_strip(self, BOTHSTRIP); /* Common case */
1961 else
1962 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963}
1964
1965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001966PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001967"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001970If chars is given and not None, remove characters in chars instead.\n\
1971If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
1973static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976 if (PyTuple_GET_SIZE(args) == 0)
1977 return do_strip(self, LEFTSTRIP); /* Common case */
1978 else
1979 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980}
1981
1982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001983PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001984"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001987If chars is given and not None, remove characters in chars instead.\n\
1988If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
1990static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001993 if (PyTuple_GET_SIZE(args) == 0)
1994 return do_strip(self, RIGHTSTRIP); /* Common case */
1995 else
1996 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997}
1998
1999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001"S.lower() -> string\n\
2002\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
2005static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002006string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
2008 char *s = PyString_AS_STRING(self), *s_new;
2009 int i, n = PyString_GET_SIZE(self);
2010 PyObject *new;
2011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 new = PyString_FromStringAndSize(NULL, n);
2013 if (new == NULL)
2014 return NULL;
2015 s_new = PyString_AsString(new);
2016 for (i = 0; i < n; i++) {
2017 int c = Py_CHARMASK(*s++);
2018 if (isupper(c)) {
2019 *s_new = tolower(c);
2020 } else
2021 *s_new = c;
2022 s_new++;
2023 }
2024 return new;
2025}
2026
2027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029"S.upper() -> string\n\
2030\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002031Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032
2033static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002034string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035{
2036 char *s = PyString_AS_STRING(self), *s_new;
2037 int i, n = PyString_GET_SIZE(self);
2038 PyObject *new;
2039
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 new = PyString_FromStringAndSize(NULL, n);
2041 if (new == NULL)
2042 return NULL;
2043 s_new = PyString_AsString(new);
2044 for (i = 0; i < n; i++) {
2045 int c = Py_CHARMASK(*s++);
2046 if (islower(c)) {
2047 *s_new = toupper(c);
2048 } else
2049 *s_new = c;
2050 s_new++;
2051 }
2052 return new;
2053}
2054
2055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002056PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057"S.title() -> string\n\
2058\n\
2059Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
2062static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002063string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064{
2065 char *s = PyString_AS_STRING(self), *s_new;
2066 int i, n = PyString_GET_SIZE(self);
2067 int previous_is_cased = 0;
2068 PyObject *new;
2069
Guido van Rossum4c08d552000-03-10 22:55:18 +00002070 new = PyString_FromStringAndSize(NULL, n);
2071 if (new == NULL)
2072 return NULL;
2073 s_new = PyString_AsString(new);
2074 for (i = 0; i < n; i++) {
2075 int c = Py_CHARMASK(*s++);
2076 if (islower(c)) {
2077 if (!previous_is_cased)
2078 c = toupper(c);
2079 previous_is_cased = 1;
2080 } else if (isupper(c)) {
2081 if (previous_is_cased)
2082 c = tolower(c);
2083 previous_is_cased = 1;
2084 } else
2085 previous_is_cased = 0;
2086 *s_new++ = c;
2087 }
2088 return new;
2089}
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.capitalize() -> string\n\
2093\n\
2094Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002095capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096
2097static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002098string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099{
2100 char *s = PyString_AS_STRING(self), *s_new;
2101 int i, n = PyString_GET_SIZE(self);
2102 PyObject *new;
2103
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104 new = PyString_FromStringAndSize(NULL, n);
2105 if (new == NULL)
2106 return NULL;
2107 s_new = PyString_AsString(new);
2108 if (0 < n) {
2109 int c = Py_CHARMASK(*s++);
2110 if (islower(c))
2111 *s_new = toupper(c);
2112 else
2113 *s_new = c;
2114 s_new++;
2115 }
2116 for (i = 1; i < n; i++) {
2117 int c = Py_CHARMASK(*s++);
2118 if (isupper(c))
2119 *s_new = tolower(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
2124 return new;
2125}
2126
2127
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002128PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129"S.count(sub[, start[, end]]) -> int\n\
2130\n\
2131Return the number of occurrences of substring sub in string\n\
2132S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002133interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134
2135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002136string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 int len = PyString_GET_SIZE(self), n;
2140 int i = 0, last = INT_MAX;
2141 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143
Guido van Rossumc6821402000-05-08 14:08:05 +00002144 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2145 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002147
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 if (PyString_Check(subobj)) {
2149 sub = PyString_AS_STRING(subobj);
2150 n = PyString_GET_SIZE(subobj);
2151 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002152#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002153 else if (PyUnicode_Check(subobj)) {
2154 int count;
2155 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2156 if (count == -1)
2157 return NULL;
2158 else
2159 return PyInt_FromLong((long) count);
2160 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002161#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2163 return NULL;
2164
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002165 string_adjust_indices(&i, &last, len);
2166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 m = last + 1 - n;
2168 if (n == 0)
2169 return PyInt_FromLong((long) (m-i));
2170
2171 r = 0;
2172 while (i < m) {
2173 if (!memcmp(s+i, sub, n)) {
2174 r++;
2175 i += n;
2176 } else {
2177 i++;
2178 }
2179 }
2180 return PyInt_FromLong((long) r);
2181}
2182
2183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185"S.swapcase() -> string\n\
2186\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189
2190static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002191string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192{
2193 char *s = PyString_AS_STRING(self), *s_new;
2194 int i, n = PyString_GET_SIZE(self);
2195 PyObject *new;
2196
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 new = PyString_FromStringAndSize(NULL, n);
2198 if (new == NULL)
2199 return NULL;
2200 s_new = PyString_AsString(new);
2201 for (i = 0; i < n; i++) {
2202 int c = Py_CHARMASK(*s++);
2203 if (islower(c)) {
2204 *s_new = toupper(c);
2205 }
2206 else if (isupper(c)) {
2207 *s_new = tolower(c);
2208 }
2209 else
2210 *s_new = c;
2211 s_new++;
2212 }
2213 return new;
2214}
2215
2216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218"S.translate(table [,deletechars]) -> string\n\
2219\n\
2220Return a copy of the string S, where all characters occurring\n\
2221in the optional argument deletechars are removed, and the\n\
2222remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002223translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224
2225static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002226string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 register char *input, *output;
2229 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 register int i, c, changed = 0;
2231 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 int inlen, tablen, dellen = 0;
2234 PyObject *result;
2235 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002238 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241
2242 if (PyString_Check(tableobj)) {
2243 table1 = PyString_AS_STRING(tableobj);
2244 tablen = PyString_GET_SIZE(tableobj);
2245 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002246#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002248 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 parameter; instead a mapping to None will cause characters
2250 to be deleted. */
2251 if (delobj != NULL) {
2252 PyErr_SetString(PyExc_TypeError,
2253 "deletions are implemented differently for unicode");
2254 return NULL;
2255 }
2256 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2257 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002258#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261
Martin v. Löwis00b61272002-12-12 20:03:19 +00002262 if (tablen != 256) {
2263 PyErr_SetString(PyExc_ValueError,
2264 "translation table must be 256 characters long");
2265 return NULL;
2266 }
2267
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 if (delobj != NULL) {
2269 if (PyString_Check(delobj)) {
2270 del_table = PyString_AS_STRING(delobj);
2271 dellen = PyString_GET_SIZE(delobj);
2272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 else if (PyUnicode_Check(delobj)) {
2275 PyErr_SetString(PyExc_TypeError,
2276 "deletions are implemented differently for unicode");
2277 return NULL;
2278 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002279#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2281 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 }
2283 else {
2284 del_table = NULL;
2285 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 }
2287
2288 table = table1;
2289 inlen = PyString_Size(input_obj);
2290 result = PyString_FromStringAndSize((char *)NULL, inlen);
2291 if (result == NULL)
2292 return NULL;
2293 output_start = output = PyString_AsString(result);
2294 input = PyString_AsString(input_obj);
2295
2296 if (dellen == 0) {
2297 /* If no deletions are required, use faster code */
2298 for (i = inlen; --i >= 0; ) {
2299 c = Py_CHARMASK(*input++);
2300 if (Py_CHARMASK((*output++ = table[c])) != c)
2301 changed = 1;
2302 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002303 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304 return result;
2305 Py_DECREF(result);
2306 Py_INCREF(input_obj);
2307 return input_obj;
2308 }
2309
2310 for (i = 0; i < 256; i++)
2311 trans_table[i] = Py_CHARMASK(table[i]);
2312
2313 for (i = 0; i < dellen; i++)
2314 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2315
2316 for (i = inlen; --i >= 0; ) {
2317 c = Py_CHARMASK(*input++);
2318 if (trans_table[c] != -1)
2319 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2320 continue;
2321 changed = 1;
2322 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002323 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324 Py_DECREF(result);
2325 Py_INCREF(input_obj);
2326 return input_obj;
2327 }
2328 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002329 if (inlen > 0)
2330 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return result;
2332}
2333
2334
2335/* What follows is used for implementing replace(). Perry Stoll. */
2336
2337/*
2338 mymemfind
2339
2340 strstr replacement for arbitrary blocks of memory.
2341
Barry Warsaw51ac5802000-03-20 16:36:48 +00002342 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 contents of memory pointed to by PAT. Returns the index into MEM if
2344 found, or -1 if not found. If len of PAT is greater than length of
2345 MEM, the function returns -1.
2346*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002347static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002348mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349{
2350 register int ii;
2351
2352 /* pattern can not occur in the last pat_len-1 chars */
2353 len -= pat_len;
2354
2355 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002356 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return ii;
2358 }
2359 }
2360 return -1;
2361}
2362
2363/*
2364 mymemcnt
2365
2366 Return the number of distinct times PAT is found in MEM.
2367 meaning mem=1111 and pat==11 returns 2.
2368 mem=11111 and pat==11 also return 2.
2369 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002370static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002371mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372{
2373 register int offset = 0;
2374 int nfound = 0;
2375
2376 while (len >= 0) {
2377 offset = mymemfind(mem, len, pat, pat_len);
2378 if (offset == -1)
2379 break;
2380 mem += offset + pat_len;
2381 len -= offset + pat_len;
2382 nfound++;
2383 }
2384 return nfound;
2385}
2386
2387/*
2388 mymemreplace
2389
Thomas Wouters7e474022000-07-16 12:04:32 +00002390 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 replaced with SUB.
2392
Thomas Wouters7e474022000-07-16 12:04:32 +00002393 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 of PAT in STR, then the original string is returned. Otherwise, a new
2395 string is allocated here and returned.
2396
2397 on return, out_len is:
2398 the length of output string, or
2399 -1 if the input string is returned, or
2400 unchanged if an error occurs (no memory).
2401
2402 return value is:
2403 the new string allocated locally, or
2404 NULL if an error occurred.
2405*/
2406static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002407mymemreplace(const char *str, int len, /* input string */
2408 const char *pat, int pat_len, /* pattern string to find */
2409 const char *sub, int sub_len, /* substitution string */
2410 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002411 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412{
2413 char *out_s;
2414 char *new_s;
2415 int nfound, offset, new_len;
2416
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002417 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 goto return_same;
2419
2420 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002421 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002422 if (count < 0)
2423 count = INT_MAX;
2424 else if (nfound > count)
2425 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 if (nfound == 0)
2427 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002428
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002430 if (new_len == 0) {
2431 /* Have to allocate something for the caller to free(). */
2432 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002433 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002434 return NULL;
2435 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002437 else {
2438 assert(new_len > 0);
2439 new_s = (char *)PyMem_MALLOC(new_len);
2440 if (new_s == NULL)
2441 return NULL;
2442 out_s = new_s;
2443
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002444 if (pat_len > 0) {
2445 for (; nfound > 0; --nfound) {
2446 /* find index of next instance of pattern */
2447 offset = mymemfind(str, len, pat, pat_len);
2448 if (offset == -1)
2449 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002450
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002451 /* copy non matching part of input string */
2452 memcpy(new_s, str, offset);
2453 str += offset + pat_len;
2454 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002455
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002456 /* copy substitute into the output string */
2457 new_s += offset;
2458 memcpy(new_s, sub, sub_len);
2459 new_s += sub_len;
2460 }
2461 /* copy any remaining values into output string */
2462 if (len > 0)
2463 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002464 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002465 else {
2466 for (;;++str, --len) {
2467 memcpy(new_s, sub, sub_len);
2468 new_s += sub_len;
2469 if (--nfound <= 0) {
2470 memcpy(new_s, str, len);
2471 break;
2472 }
2473 *new_s++ = *str;
2474 }
2475 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002476 }
2477 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478 return out_s;
2479
2480 return_same:
2481 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002482 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483}
2484
2485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002486PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002487"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488\n\
2489Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002490old replaced by new. If the optional argument count is\n\
2491given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492
2493static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002494string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 const char *str = PyString_AS_STRING(self), *sub, *repl;
2497 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002498 const int len = PyString_GET_SIZE(self);
2499 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002502 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 if (!PyArg_ParseTuple(args, "OO|i:replace",
2505 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507
2508 if (PyString_Check(subobj)) {
2509 sub = PyString_AS_STRING(subobj);
2510 sub_len = PyString_GET_SIZE(subobj);
2511 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002512#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002514 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002516#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2518 return NULL;
2519
2520 if (PyString_Check(replobj)) {
2521 repl = PyString_AS_STRING(replobj);
2522 repl_len = PyString_GET_SIZE(replobj);
2523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002524#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002526 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002528#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2530 return NULL;
2531
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533 if (new_s == NULL) {
2534 PyErr_NoMemory();
2535 return NULL;
2536 }
2537 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002538 if (PyString_CheckExact(self)) {
2539 /* we're returning another reference to self */
2540 new = (PyObject*)self;
2541 Py_INCREF(new);
2542 }
2543 else {
2544 new = PyString_FromStringAndSize(str, len);
2545 if (new == NULL)
2546 return NULL;
2547 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548 }
2549 else {
2550 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002551 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 }
2553 return new;
2554}
2555
2556
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002557PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002558"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002560Return True if S starts with the specified prefix, False otherwise.\n\
2561With optional start, test S beginning at that position.\n\
2562With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
2564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002565string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002568 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570 int plen;
2571 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002572 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574
Guido van Rossumc6821402000-05-08 14:08:05 +00002575 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2576 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577 return NULL;
2578 if (PyString_Check(subobj)) {
2579 prefix = PyString_AS_STRING(subobj);
2580 plen = PyString_GET_SIZE(subobj);
2581 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002582#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002583 else if (PyUnicode_Check(subobj)) {
2584 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002585 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002586 subobj, start, end, -1);
2587 if (rc == -1)
2588 return NULL;
2589 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002590 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002591 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002592#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594 return NULL;
2595
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002596 string_adjust_indices(&start, &end, len);
2597
2598 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002599 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002601 if (end-start >= plen)
2602 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2603 else
2604 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
2607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002608PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002609"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002611Return True if S ends with the specified suffix, False otherwise.\n\
2612With optional start, test S beginning at that position.\n\
2613With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614
2615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002616string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002618 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 const char* suffix;
2621 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002623 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625
Guido van Rossumc6821402000-05-08 14:08:05 +00002626 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2627 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 return NULL;
2629 if (PyString_Check(subobj)) {
2630 suffix = PyString_AS_STRING(subobj);
2631 slen = PyString_GET_SIZE(subobj);
2632 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002633#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002634 else if (PyUnicode_Check(subobj)) {
2635 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002636 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002637 subobj, start, end, +1);
2638 if (rc == -1)
2639 return NULL;
2640 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002642 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002643#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002645 return NULL;
2646
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002647 string_adjust_indices(&start, &end, len);
2648
2649 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002650 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002652 if (end-slen > start)
2653 start = end - slen;
2654 if (end-start >= slen)
2655 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2656 else
2657 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658}
2659
2660
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002661PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002662"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002663\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002664Encodes S using the codec registered for encoding. encoding defaults\n\
2665to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002666handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002667a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2668'xmlcharrefreplace' as well as any other name registered with\n\
2669codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002670
2671static PyObject *
2672string_encode(PyStringObject *self, PyObject *args)
2673{
2674 char *encoding = NULL;
2675 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002676 PyObject *v;
2677
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002678 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2679 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002680 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002681 if (v == NULL)
2682 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002683 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2684 PyErr_Format(PyExc_TypeError,
2685 "encoder did not return a string/unicode object "
2686 "(type=%.400s)",
2687 v->ob_type->tp_name);
2688 Py_DECREF(v);
2689 return NULL;
2690 }
2691 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002692
2693 onError:
2694 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002695}
2696
2697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002698PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002699"S.decode([encoding[,errors]]) -> object\n\
2700\n\
2701Decodes S using the codec registered for encoding. encoding defaults\n\
2702to the default encoding. errors may be given to set a different error\n\
2703handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002704a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2705as well as any other name registerd with codecs.register_error that is\n\
2706able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002707
2708static PyObject *
2709string_decode(PyStringObject *self, PyObject *args)
2710{
2711 char *encoding = NULL;
2712 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002713 PyObject *v;
2714
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002715 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2716 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002717 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002718 if (v == NULL)
2719 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002720 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2721 PyErr_Format(PyExc_TypeError,
2722 "decoder did not return a string/unicode object "
2723 "(type=%.400s)",
2724 v->ob_type->tp_name);
2725 Py_DECREF(v);
2726 return NULL;
2727 }
2728 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002729
2730 onError:
2731 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002732}
2733
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736"S.expandtabs([tabsize]) -> string\n\
2737\n\
2738Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002739If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002740
2741static PyObject*
2742string_expandtabs(PyStringObject *self, PyObject *args)
2743{
2744 const char *e, *p;
2745 char *q;
2746 int i, j;
2747 PyObject *u;
2748 int tabsize = 8;
2749
2750 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2751 return NULL;
2752
Thomas Wouters7e474022000-07-16 12:04:32 +00002753 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002754 i = j = 0;
2755 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2756 for (p = PyString_AS_STRING(self); p < e; p++)
2757 if (*p == '\t') {
2758 if (tabsize > 0)
2759 j += tabsize - (j % tabsize);
2760 }
2761 else {
2762 j++;
2763 if (*p == '\n' || *p == '\r') {
2764 i += j;
2765 j = 0;
2766 }
2767 }
2768
2769 /* Second pass: create output string and fill it */
2770 u = PyString_FromStringAndSize(NULL, i + j);
2771 if (!u)
2772 return NULL;
2773
2774 j = 0;
2775 q = PyString_AS_STRING(u);
2776
2777 for (p = PyString_AS_STRING(self); p < e; p++)
2778 if (*p == '\t') {
2779 if (tabsize > 0) {
2780 i = tabsize - (j % tabsize);
2781 j += i;
2782 while (i--)
2783 *q++ = ' ';
2784 }
2785 }
2786 else {
2787 j++;
2788 *q++ = *p;
2789 if (*p == '\n' || *p == '\r')
2790 j = 0;
2791 }
2792
2793 return u;
2794}
2795
Tim Peters8fa5dd02001-09-12 02:18:30 +00002796static PyObject *
2797pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002798{
2799 PyObject *u;
2800
2801 if (left < 0)
2802 left = 0;
2803 if (right < 0)
2804 right = 0;
2805
Tim Peters8fa5dd02001-09-12 02:18:30 +00002806 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002807 Py_INCREF(self);
2808 return (PyObject *)self;
2809 }
2810
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002811 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002812 left + PyString_GET_SIZE(self) + right);
2813 if (u) {
2814 if (left)
2815 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002816 memcpy(PyString_AS_STRING(u) + left,
2817 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002818 PyString_GET_SIZE(self));
2819 if (right)
2820 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2821 fill, right);
2822 }
2823
2824 return u;
2825}
2826
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002827PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002828"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002829"\n"
2830"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002831"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832
2833static PyObject *
2834string_ljust(PyStringObject *self, PyObject *args)
2835{
2836 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002837 char fillchar = ' ';
2838
2839 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840 return NULL;
2841
Tim Peters8fa5dd02001-09-12 02:18:30 +00002842 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002843 Py_INCREF(self);
2844 return (PyObject*) self;
2845 }
2846
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002847 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002848}
2849
2850
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002851PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002852"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002853"\n"
2854"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002855"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856
2857static PyObject *
2858string_rjust(PyStringObject *self, PyObject *args)
2859{
2860 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002861 char fillchar = ' ';
2862
2863 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864 return NULL;
2865
Tim Peters8fa5dd02001-09-12 02:18:30 +00002866 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867 Py_INCREF(self);
2868 return (PyObject*) self;
2869 }
2870
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002871 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872}
2873
2874
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002875PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002876"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002877"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002878"Return S centered in a string of length width. Padding is\n"
2879"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880
2881static PyObject *
2882string_center(PyStringObject *self, PyObject *args)
2883{
2884 int marg, left;
2885 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002886 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002887
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002888 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002889 return NULL;
2890
Tim Peters8fa5dd02001-09-12 02:18:30 +00002891 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002892 Py_INCREF(self);
2893 return (PyObject*) self;
2894 }
2895
2896 marg = width - PyString_GET_SIZE(self);
2897 left = marg / 2 + (marg & width & 1);
2898
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002899 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900}
2901
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002902PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002903"S.zfill(width) -> string\n"
2904"\n"
2905"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002906"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002907
2908static PyObject *
2909string_zfill(PyStringObject *self, PyObject *args)
2910{
2911 int fill;
2912 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002913 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002914
2915 int width;
2916 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2917 return NULL;
2918
2919 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002920 if (PyString_CheckExact(self)) {
2921 Py_INCREF(self);
2922 return (PyObject*) self;
2923 }
2924 else
2925 return PyString_FromStringAndSize(
2926 PyString_AS_STRING(self),
2927 PyString_GET_SIZE(self)
2928 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002929 }
2930
2931 fill = width - PyString_GET_SIZE(self);
2932
2933 s = pad(self, fill, 0, '0');
2934
2935 if (s == NULL)
2936 return NULL;
2937
2938 p = PyString_AS_STRING(s);
2939 if (p[fill] == '+' || p[fill] == '-') {
2940 /* move sign to beginning of string */
2941 p[0] = p[fill];
2942 p[fill] = '0';
2943 }
2944
2945 return (PyObject*) s;
2946}
2947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002948PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002949"S.isspace() -> bool\n\
2950\n\
2951Return True if all characters in S are whitespace\n\
2952and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002953
2954static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002955string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002956{
Fred Drakeba096332000-07-09 07:04:36 +00002957 register const unsigned char *p
2958 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002959 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961 /* Shortcut for single character strings */
2962 if (PyString_GET_SIZE(self) == 1 &&
2963 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002966 /* Special case for empty strings */
2967 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002968 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002969
Guido van Rossum4c08d552000-03-10 22:55:18 +00002970 e = p + PyString_GET_SIZE(self);
2971 for (; p < e; p++) {
2972 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002973 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002975 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002976}
2977
2978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002979PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002980"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002981\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002982Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002983and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002984
2985static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002986string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002987{
Fred Drakeba096332000-07-09 07:04:36 +00002988 register const unsigned char *p
2989 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002990 register const unsigned char *e;
2991
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002992 /* Shortcut for single character strings */
2993 if (PyString_GET_SIZE(self) == 1 &&
2994 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002995 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002996
2997 /* Special case for empty strings */
2998 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002999 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003000
3001 e = p + PyString_GET_SIZE(self);
3002 for (; p < e; p++) {
3003 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003004 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003005 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003006 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003007}
3008
3009
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003010PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003011"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003012\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003013Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003014and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003015
3016static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003017string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018{
Fred Drakeba096332000-07-09 07:04:36 +00003019 register const unsigned char *p
3020 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003021 register const unsigned char *e;
3022
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003023 /* Shortcut for single character strings */
3024 if (PyString_GET_SIZE(self) == 1 &&
3025 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003026 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003027
3028 /* Special case for empty strings */
3029 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003030 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003031
3032 e = p + PyString_GET_SIZE(self);
3033 for (; p < e; p++) {
3034 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003035 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003036 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003037 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003038}
3039
3040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003041PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003042"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003044Return True if all characters in S are digits\n\
3045and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003046
3047static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003048string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003049{
Fred Drakeba096332000-07-09 07:04:36 +00003050 register const unsigned char *p
3051 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003052 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003053
Guido van Rossum4c08d552000-03-10 22:55:18 +00003054 /* Shortcut for single character strings */
3055 if (PyString_GET_SIZE(self) == 1 &&
3056 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003057 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003059 /* Special case for empty strings */
3060 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003061 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003062
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 e = p + PyString_GET_SIZE(self);
3064 for (; p < e; p++) {
3065 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003066 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003068 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069}
3070
3071
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003072PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003073"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003075Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003076at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003077
3078static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003079string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080{
Fred Drakeba096332000-07-09 07:04:36 +00003081 register const unsigned char *p
3082 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003083 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084 int cased;
3085
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086 /* Shortcut for single character strings */
3087 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003089
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003090 /* Special case for empty strings */
3091 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003092 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003093
Guido van Rossum4c08d552000-03-10 22:55:18 +00003094 e = p + PyString_GET_SIZE(self);
3095 cased = 0;
3096 for (; p < e; p++) {
3097 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003098 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 else if (!cased && islower(*p))
3100 cased = 1;
3101 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003102 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103}
3104
3105
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003106PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003107"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003108\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003109Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003110at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111
3112static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003113string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114{
Fred Drakeba096332000-07-09 07:04:36 +00003115 register const unsigned char *p
3116 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003117 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118 int cased;
3119
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120 /* Shortcut for single character strings */
3121 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003124 /* Special case for empty strings */
3125 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003127
Guido van Rossum4c08d552000-03-10 22:55:18 +00003128 e = p + PyString_GET_SIZE(self);
3129 cased = 0;
3130 for (; p < e; p++) {
3131 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 else if (!cased && isupper(*p))
3134 cased = 1;
3135 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003136 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137}
3138
3139
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003140PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003141"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003142\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003143Return True if S is a titlecased string and there is at least one\n\
3144character in S, i.e. uppercase characters may only follow uncased\n\
3145characters and lowercase characters only cased ones. Return False\n\
3146otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147
3148static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003149string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150{
Fred Drakeba096332000-07-09 07:04:36 +00003151 register const unsigned char *p
3152 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003153 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 int cased, previous_is_cased;
3155
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 /* Shortcut for single character strings */
3157 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003158 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003160 /* Special case for empty strings */
3161 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003162 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003163
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164 e = p + PyString_GET_SIZE(self);
3165 cased = 0;
3166 previous_is_cased = 0;
3167 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003168 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169
3170 if (isupper(ch)) {
3171 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003172 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003173 previous_is_cased = 1;
3174 cased = 1;
3175 }
3176 else if (islower(ch)) {
3177 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003178 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179 previous_is_cased = 1;
3180 cased = 1;
3181 }
3182 else
3183 previous_is_cased = 0;
3184 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003185 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186}
3187
3188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003189PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003190"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191\n\
3192Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003193Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003194is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196static PyObject*
3197string_splitlines(PyStringObject *self, PyObject *args)
3198{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 register int i;
3200 register int j;
3201 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003202 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003203 PyObject *list;
3204 PyObject *str;
3205 char *data;
3206
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003207 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 return NULL;
3209
3210 data = PyString_AS_STRING(self);
3211 len = PyString_GET_SIZE(self);
3212
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213 list = PyList_New(0);
3214 if (!list)
3215 goto onError;
3216
3217 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003218 int eol;
3219
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220 /* Find a line and append it */
3221 while (i < len && data[i] != '\n' && data[i] != '\r')
3222 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223
3224 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003225 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003226 if (i < len) {
3227 if (data[i] == '\r' && i + 1 < len &&
3228 data[i+1] == '\n')
3229 i += 2;
3230 else
3231 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003232 if (keepends)
3233 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003235 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236 j = i;
3237 }
3238 if (j < len) {
3239 SPLIT_APPEND(data, j, len);
3240 }
3241
3242 return list;
3243
3244 onError:
3245 Py_DECREF(list);
3246 return NULL;
3247}
3248
3249#undef SPLIT_APPEND
3250
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003251static PyObject *
3252string_getnewargs(PyStringObject *v)
3253{
3254 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3255}
3256
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003257
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003258static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003259string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 /* Counterparts of the obsolete stropmodule functions; except
3261 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003262 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3263 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003264 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003265 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3266 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003267 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3268 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3269 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3270 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3271 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3272 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3273 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003274 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3275 capitalize__doc__},
3276 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3277 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3278 endswith__doc__},
3279 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3280 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3281 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3282 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3283 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3284 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3285 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3286 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3287 startswith__doc__},
3288 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3289 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3290 swapcase__doc__},
3291 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3292 translate__doc__},
3293 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3294 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3295 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3296 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3297 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3298 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3299 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3300 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3301 expandtabs__doc__},
3302 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3303 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003304 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003305 {NULL, NULL} /* sentinel */
3306};
3307
Jeremy Hylton938ace62002-07-17 16:30:39 +00003308static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003309str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3310
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003311static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003312string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003313{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003314 PyObject *x = NULL;
3315 static char *kwlist[] = {"object", 0};
3316
Guido van Rossumae960af2001-08-30 03:11:59 +00003317 if (type != &PyString_Type)
3318 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003319 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3320 return NULL;
3321 if (x == NULL)
3322 return PyString_FromString("");
3323 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003324}
3325
Guido van Rossumae960af2001-08-30 03:11:59 +00003326static PyObject *
3327str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3328{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003329 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003330 int n;
3331
3332 assert(PyType_IsSubtype(type, &PyString_Type));
3333 tmp = string_new(&PyString_Type, args, kwds);
3334 if (tmp == NULL)
3335 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003336 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003337 n = PyString_GET_SIZE(tmp);
3338 pnew = type->tp_alloc(type, n);
3339 if (pnew != NULL) {
3340 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003341 ((PyStringObject *)pnew)->ob_shash =
3342 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003343 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003344 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003345 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003346 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003347}
3348
Guido van Rossumcacfc072002-05-24 19:01:59 +00003349static PyObject *
3350basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3351{
3352 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003353 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003354 return NULL;
3355}
3356
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003357static PyObject *
3358string_mod(PyObject *v, PyObject *w)
3359{
3360 if (!PyString_Check(v)) {
3361 Py_INCREF(Py_NotImplemented);
3362 return Py_NotImplemented;
3363 }
3364 return PyString_Format(v, w);
3365}
3366
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003367PyDoc_STRVAR(basestring_doc,
3368"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003369
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003370static PyNumberMethods string_as_number = {
3371 0, /*nb_add*/
3372 0, /*nb_subtract*/
3373 0, /*nb_multiply*/
3374 0, /*nb_divide*/
3375 string_mod, /*nb_remainder*/
3376};
3377
3378
Guido van Rossumcacfc072002-05-24 19:01:59 +00003379PyTypeObject PyBaseString_Type = {
3380 PyObject_HEAD_INIT(&PyType_Type)
3381 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003382 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003383 0,
3384 0,
3385 0, /* tp_dealloc */
3386 0, /* tp_print */
3387 0, /* tp_getattr */
3388 0, /* tp_setattr */
3389 0, /* tp_compare */
3390 0, /* tp_repr */
3391 0, /* tp_as_number */
3392 0, /* tp_as_sequence */
3393 0, /* tp_as_mapping */
3394 0, /* tp_hash */
3395 0, /* tp_call */
3396 0, /* tp_str */
3397 0, /* tp_getattro */
3398 0, /* tp_setattro */
3399 0, /* tp_as_buffer */
3400 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3401 basestring_doc, /* tp_doc */
3402 0, /* tp_traverse */
3403 0, /* tp_clear */
3404 0, /* tp_richcompare */
3405 0, /* tp_weaklistoffset */
3406 0, /* tp_iter */
3407 0, /* tp_iternext */
3408 0, /* tp_methods */
3409 0, /* tp_members */
3410 0, /* tp_getset */
3411 &PyBaseObject_Type, /* tp_base */
3412 0, /* tp_dict */
3413 0, /* tp_descr_get */
3414 0, /* tp_descr_set */
3415 0, /* tp_dictoffset */
3416 0, /* tp_init */
3417 0, /* tp_alloc */
3418 basestring_new, /* tp_new */
3419 0, /* tp_free */
3420};
3421
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003422PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003423"str(object) -> string\n\
3424\n\
3425Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003426If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003427
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003428PyTypeObject PyString_Type = {
3429 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003430 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003431 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003432 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003433 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003434 (destructor)string_dealloc, /* tp_dealloc */
3435 (printfunc)string_print, /* tp_print */
3436 0, /* tp_getattr */
3437 0, /* tp_setattr */
3438 0, /* tp_compare */
3439 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003440 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003441 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003442 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003443 (hashfunc)string_hash, /* tp_hash */
3444 0, /* tp_call */
3445 (reprfunc)string_str, /* tp_str */
3446 PyObject_GenericGetAttr, /* tp_getattro */
3447 0, /* tp_setattro */
3448 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003449 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3450 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003451 string_doc, /* tp_doc */
3452 0, /* tp_traverse */
3453 0, /* tp_clear */
3454 (richcmpfunc)string_richcompare, /* tp_richcompare */
3455 0, /* tp_weaklistoffset */
3456 0, /* tp_iter */
3457 0, /* tp_iternext */
3458 string_methods, /* tp_methods */
3459 0, /* tp_members */
3460 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003461 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003462 0, /* tp_dict */
3463 0, /* tp_descr_get */
3464 0, /* tp_descr_set */
3465 0, /* tp_dictoffset */
3466 0, /* tp_init */
3467 0, /* tp_alloc */
3468 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003469 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003470};
3471
3472void
Fred Drakeba096332000-07-09 07:04:36 +00003473PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003474{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003475 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003476 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003477 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003478 if (w == NULL || !PyString_Check(*pv)) {
3479 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003480 *pv = NULL;
3481 return;
3482 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003483 v = string_concat((PyStringObject *) *pv, w);
3484 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003485 *pv = v;
3486}
3487
Guido van Rossum013142a1994-08-30 08:19:36 +00003488void
Fred Drakeba096332000-07-09 07:04:36 +00003489PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003490{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003491 PyString_Concat(pv, w);
3492 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003493}
3494
3495
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003496/* The following function breaks the notion that strings are immutable:
3497 it changes the size of a string. We get away with this only if there
3498 is only one module referencing the object. You can also think of it
3499 as creating a new string object and destroying the old one, only
3500 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003501 already be known to some other part of the code...
3502 Note that if there's not enough memory to resize the string, the original
3503 string object at *pv is deallocated, *pv is set to NULL, an "out of
3504 memory" exception is set, and -1 is returned. Else (on success) 0 is
3505 returned, and the value in *pv may or may not be the same as on input.
3506 As always, an extra byte is allocated for a trailing \0 byte (newsize
3507 does *not* include that), and a trailing \0 byte is stored.
3508*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003509
3510int
Fred Drakeba096332000-07-09 07:04:36 +00003511_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003512{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003513 register PyObject *v;
3514 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003515 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003516 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003517 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003518 Py_DECREF(v);
3519 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003520 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003521 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003522 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003523 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003524 _Py_ForgetReference(v);
3525 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003526 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003527 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003528 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003529 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003530 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003531 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 _Py_NewReference(*pv);
3533 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003534 sv->ob_size = newsize;
3535 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003536 return 0;
3537}
Guido van Rossume5372401993-03-16 12:15:04 +00003538
3539/* Helpers for formatstring */
3540
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003541static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003542getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003543{
3544 int argidx = *p_argidx;
3545 if (argidx < arglen) {
3546 (*p_argidx)++;
3547 if (arglen < 0)
3548 return args;
3549 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003550 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003551 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003552 PyErr_SetString(PyExc_TypeError,
3553 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003554 return NULL;
3555}
3556
Tim Peters38fd5b62000-09-21 05:43:11 +00003557/* Format codes
3558 * F_LJUST '-'
3559 * F_SIGN '+'
3560 * F_BLANK ' '
3561 * F_ALT '#'
3562 * F_ZERO '0'
3563 */
Guido van Rossume5372401993-03-16 12:15:04 +00003564#define F_LJUST (1<<0)
3565#define F_SIGN (1<<1)
3566#define F_BLANK (1<<2)
3567#define F_ALT (1<<3)
3568#define F_ZERO (1<<4)
3569
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003570static int
Fred Drakeba096332000-07-09 07:04:36 +00003571formatfloat(char *buf, size_t buflen, int flags,
3572 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003573{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003574 /* fmt = '%#.' + `prec` + `type`
3575 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003576 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003577 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003578 x = PyFloat_AsDouble(v);
3579 if (x == -1.0 && PyErr_Occurred()) {
3580 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003581 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003582 }
Guido van Rossume5372401993-03-16 12:15:04 +00003583 if (prec < 0)
3584 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003585 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3586 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003587 /* Worst case length calc to ensure no buffer overrun:
3588
3589 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003590 fmt = %#.<prec>g
3591 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003592 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003593 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003594
3595 'f' formats:
3596 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3597 len = 1 + 50 + 1 + prec = 52 + prec
3598
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003599 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003600 always given), therefore increase the length by one.
3601
3602 */
3603 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3604 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003605 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003606 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003607 return -1;
3608 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003609 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3610 (flags&F_ALT) ? "#" : "",
3611 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003612 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003613 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003614}
3615
Tim Peters38fd5b62000-09-21 05:43:11 +00003616/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3617 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3618 * Python's regular ints.
3619 * Return value: a new PyString*, or NULL if error.
3620 * . *pbuf is set to point into it,
3621 * *plen set to the # of chars following that.
3622 * Caller must decref it when done using pbuf.
3623 * The string starting at *pbuf is of the form
3624 * "-"? ("0x" | "0X")? digit+
3625 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003626 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003627 * There will be at least prec digits, zero-filled on the left if
3628 * necessary to get that many.
3629 * val object to be converted
3630 * flags bitmask of format flags; only F_ALT is looked at
3631 * prec minimum number of digits; 0-fill on left if needed
3632 * type a character in [duoxX]; u acts the same as d
3633 *
3634 * CAUTION: o, x and X conversions on regular ints can never
3635 * produce a '-' sign, but can for Python's unbounded ints.
3636 */
3637PyObject*
3638_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3639 char **pbuf, int *plen)
3640{
3641 PyObject *result = NULL;
3642 char *buf;
3643 int i;
3644 int sign; /* 1 if '-', else 0 */
3645 int len; /* number of characters */
3646 int numdigits; /* len == numnondigits + numdigits */
3647 int numnondigits = 0;
3648
3649 switch (type) {
3650 case 'd':
3651 case 'u':
3652 result = val->ob_type->tp_str(val);
3653 break;
3654 case 'o':
3655 result = val->ob_type->tp_as_number->nb_oct(val);
3656 break;
3657 case 'x':
3658 case 'X':
3659 numnondigits = 2;
3660 result = val->ob_type->tp_as_number->nb_hex(val);
3661 break;
3662 default:
3663 assert(!"'type' not in [duoxX]");
3664 }
3665 if (!result)
3666 return NULL;
3667
3668 /* To modify the string in-place, there can only be one reference. */
3669 if (result->ob_refcnt != 1) {
3670 PyErr_BadInternalCall();
3671 return NULL;
3672 }
3673 buf = PyString_AsString(result);
3674 len = PyString_Size(result);
3675 if (buf[len-1] == 'L') {
3676 --len;
3677 buf[len] = '\0';
3678 }
3679 sign = buf[0] == '-';
3680 numnondigits += sign;
3681 numdigits = len - numnondigits;
3682 assert(numdigits > 0);
3683
Tim Petersfff53252001-04-12 18:38:48 +00003684 /* Get rid of base marker unless F_ALT */
3685 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003686 /* Need to skip 0x, 0X or 0. */
3687 int skipped = 0;
3688 switch (type) {
3689 case 'o':
3690 assert(buf[sign] == '0');
3691 /* If 0 is only digit, leave it alone. */
3692 if (numdigits > 1) {
3693 skipped = 1;
3694 --numdigits;
3695 }
3696 break;
3697 case 'x':
3698 case 'X':
3699 assert(buf[sign] == '0');
3700 assert(buf[sign + 1] == 'x');
3701 skipped = 2;
3702 numnondigits -= 2;
3703 break;
3704 }
3705 if (skipped) {
3706 buf += skipped;
3707 len -= skipped;
3708 if (sign)
3709 buf[0] = '-';
3710 }
3711 assert(len == numnondigits + numdigits);
3712 assert(numdigits > 0);
3713 }
3714
3715 /* Fill with leading zeroes to meet minimum width. */
3716 if (prec > numdigits) {
3717 PyObject *r1 = PyString_FromStringAndSize(NULL,
3718 numnondigits + prec);
3719 char *b1;
3720 if (!r1) {
3721 Py_DECREF(result);
3722 return NULL;
3723 }
3724 b1 = PyString_AS_STRING(r1);
3725 for (i = 0; i < numnondigits; ++i)
3726 *b1++ = *buf++;
3727 for (i = 0; i < prec - numdigits; i++)
3728 *b1++ = '0';
3729 for (i = 0; i < numdigits; i++)
3730 *b1++ = *buf++;
3731 *b1 = '\0';
3732 Py_DECREF(result);
3733 result = r1;
3734 buf = PyString_AS_STRING(result);
3735 len = numnondigits + prec;
3736 }
3737
3738 /* Fix up case for hex conversions. */
3739 switch (type) {
3740 case 'x':
3741 /* Need to convert all upper case letters to lower case. */
3742 for (i = 0; i < len; i++)
3743 if (buf[i] >= 'A' && buf[i] <= 'F')
3744 buf[i] += 'a'-'A';
3745 break;
3746 case 'X':
3747 /* Need to convert 0x to 0X (and -0x to -0X). */
3748 if (buf[sign + 1] == 'x')
3749 buf[sign + 1] = 'X';
3750 break;
3751 }
3752 *pbuf = buf;
3753 *plen = len;
3754 return result;
3755}
3756
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003757static int
Fred Drakeba096332000-07-09 07:04:36 +00003758formatint(char *buf, size_t buflen, int flags,
3759 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003760{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003761 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003762 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3763 + 1 + 1 = 24 */
3764 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003765 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003766 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003767
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003768 x = PyInt_AsLong(v);
3769 if (x == -1 && PyErr_Occurred()) {
3770 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003771 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003772 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003773 if (x < 0 && type == 'u') {
3774 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003775 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003776 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3777 sign = "-";
3778 else
3779 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003780 if (prec < 0)
3781 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003782
3783 if ((flags & F_ALT) &&
3784 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003785 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003786 * of issues that cause pain:
3787 * - when 0 is being converted, the C standard leaves off
3788 * the '0x' or '0X', which is inconsistent with other
3789 * %#x/%#X conversions and inconsistent with Python's
3790 * hex() function
3791 * - there are platforms that violate the standard and
3792 * convert 0 with the '0x' or '0X'
3793 * (Metrowerks, Compaq Tru64)
3794 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003795 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003796 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003797 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003798 * We can achieve the desired consistency by inserting our
3799 * own '0x' or '0X' prefix, and substituting %x/%X in place
3800 * of %#x/%#X.
3801 *
3802 * Note that this is the same approach as used in
3803 * formatint() in unicodeobject.c
3804 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003805 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3806 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003807 }
3808 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003809 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3810 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003811 prec, type);
3812 }
3813
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003814 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3815 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003816 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003817 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003818 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003819 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003820 return -1;
3821 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003822 if (sign[0])
3823 PyOS_snprintf(buf, buflen, fmt, -x);
3824 else
3825 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003826 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003827}
3828
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003829static int
Fred Drakeba096332000-07-09 07:04:36 +00003830formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003831{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003832 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003833 if (PyString_Check(v)) {
3834 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003835 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003836 }
3837 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003838 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003839 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003840 }
3841 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003842 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003843}
3844
Guido van Rossum013142a1994-08-30 08:19:36 +00003845
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003846/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3847
3848 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3849 chars are formatted. XXX This is a magic number. Each formatting
3850 routine does bounds checking to ensure no overflow, but a better
3851 solution may be to malloc a buffer of appropriate size for each
3852 format. For now, the current solution is sufficient.
3853*/
3854#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003855
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003856PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003857PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003858{
3859 char *fmt, *res;
3860 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003861 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003862 PyObject *result, *orig_args;
3863#ifdef Py_USING_UNICODE
3864 PyObject *v, *w;
3865#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866 PyObject *dict = NULL;
3867 if (format == NULL || !PyString_Check(format) || args == NULL) {
3868 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003869 return NULL;
3870 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003871 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003872 fmt = PyString_AS_STRING(format);
3873 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003874 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003875 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003876 if (result == NULL)
3877 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003878 res = PyString_AsString(result);
3879 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003880 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003881 argidx = 0;
3882 }
3883 else {
3884 arglen = -1;
3885 argidx = -2;
3886 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003887 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3888 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003889 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003890 while (--fmtcnt >= 0) {
3891 if (*fmt != '%') {
3892 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003893 rescnt = fmtcnt + 100;
3894 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003895 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003896 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003897 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003898 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003899 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003900 }
3901 *res++ = *fmt++;
3902 }
3903 else {
3904 /* Got a format specifier */
3905 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003906 int width = -1;
3907 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003908 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003909 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003910 PyObject *v = NULL;
3911 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003912 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003913 int sign;
3914 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003915 char formatbuf[FORMATBUFLEN];
3916 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003917#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003918 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003919 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003920#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003921
Guido van Rossumda9c2711996-12-05 21:58:58 +00003922 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003923 if (*fmt == '(') {
3924 char *keystart;
3925 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003926 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003927 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003928
3929 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003930 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003931 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003932 goto error;
3933 }
3934 ++fmt;
3935 --fmtcnt;
3936 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003937 /* Skip over balanced parentheses */
3938 while (pcount > 0 && --fmtcnt >= 0) {
3939 if (*fmt == ')')
3940 --pcount;
3941 else if (*fmt == '(')
3942 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003943 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003944 }
3945 keylen = fmt - keystart - 1;
3946 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003947 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003948 "incomplete format key");
3949 goto error;
3950 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003951 key = PyString_FromStringAndSize(keystart,
3952 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003953 if (key == NULL)
3954 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003955 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003956 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003957 args_owned = 0;
3958 }
3959 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003961 if (args == NULL) {
3962 goto error;
3963 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003964 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003965 arglen = -1;
3966 argidx = -2;
3967 }
Guido van Rossume5372401993-03-16 12:15:04 +00003968 while (--fmtcnt >= 0) {
3969 switch (c = *fmt++) {
3970 case '-': flags |= F_LJUST; continue;
3971 case '+': flags |= F_SIGN; continue;
3972 case ' ': flags |= F_BLANK; continue;
3973 case '#': flags |= F_ALT; continue;
3974 case '0': flags |= F_ZERO; continue;
3975 }
3976 break;
3977 }
3978 if (c == '*') {
3979 v = getnextarg(args, arglen, &argidx);
3980 if (v == NULL)
3981 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003982 if (!PyInt_Check(v)) {
3983 PyErr_SetString(PyExc_TypeError,
3984 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003985 goto error;
3986 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003987 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003988 if (width < 0) {
3989 flags |= F_LJUST;
3990 width = -width;
3991 }
Guido van Rossume5372401993-03-16 12:15:04 +00003992 if (--fmtcnt >= 0)
3993 c = *fmt++;
3994 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003995 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003996 width = c - '0';
3997 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003998 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003999 if (!isdigit(c))
4000 break;
4001 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004002 PyErr_SetString(
4003 PyExc_ValueError,
4004 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004005 goto error;
4006 }
4007 width = width*10 + (c - '0');
4008 }
4009 }
4010 if (c == '.') {
4011 prec = 0;
4012 if (--fmtcnt >= 0)
4013 c = *fmt++;
4014 if (c == '*') {
4015 v = getnextarg(args, arglen, &argidx);
4016 if (v == NULL)
4017 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 if (!PyInt_Check(v)) {
4019 PyErr_SetString(
4020 PyExc_TypeError,
4021 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004022 goto error;
4023 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004024 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004025 if (prec < 0)
4026 prec = 0;
4027 if (--fmtcnt >= 0)
4028 c = *fmt++;
4029 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004030 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004031 prec = c - '0';
4032 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004033 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004034 if (!isdigit(c))
4035 break;
4036 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004037 PyErr_SetString(
4038 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004039 "prec too big");
4040 goto error;
4041 }
4042 prec = prec*10 + (c - '0');
4043 }
4044 }
4045 } /* prec */
4046 if (fmtcnt >= 0) {
4047 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004048 if (--fmtcnt >= 0)
4049 c = *fmt++;
4050 }
4051 }
4052 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004053 PyErr_SetString(PyExc_ValueError,
4054 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004055 goto error;
4056 }
4057 if (c != '%') {
4058 v = getnextarg(args, arglen, &argidx);
4059 if (v == NULL)
4060 goto error;
4061 }
4062 sign = 0;
4063 fill = ' ';
4064 switch (c) {
4065 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004066 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004067 len = 1;
4068 break;
4069 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004070#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004071 if (PyUnicode_Check(v)) {
4072 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004073 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004074 goto unicode;
4075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004076#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004077 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004078 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004079 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004080 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004081 else
4082 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004083 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004084 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004085 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004086 /* XXX Note: this should never happen,
4087 since PyObject_Repr() and
4088 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004089 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004090 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004091 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004092 goto error;
4093 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004094 pbuf = PyString_AS_STRING(temp);
4095 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004096 if (prec >= 0 && len > prec)
4097 len = prec;
4098 break;
4099 case 'i':
4100 case 'd':
4101 case 'u':
4102 case 'o':
4103 case 'x':
4104 case 'X':
4105 if (c == 'i')
4106 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004107 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004108 temp = _PyString_FormatLong(v, flags,
4109 prec, c, &pbuf, &len);
4110 if (!temp)
4111 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004112 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004113 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004114 else {
4115 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004116 len = formatint(pbuf,
4117 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004118 flags, prec, c, v);
4119 if (len < 0)
4120 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004121 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004122 }
4123 if (flags & F_ZERO)
4124 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004125 break;
4126 case 'e':
4127 case 'E':
4128 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004129 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004130 case 'g':
4131 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004132 if (c == 'F')
4133 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004134 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004135 len = formatfloat(pbuf, sizeof(formatbuf),
4136 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004137 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004138 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004139 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004140 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004141 fill = '0';
4142 break;
4143 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004144#ifdef Py_USING_UNICODE
4145 if (PyUnicode_Check(v)) {
4146 fmt = fmt_start;
4147 argidx = argidx_start;
4148 goto unicode;
4149 }
4150#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004151 pbuf = formatbuf;
4152 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004153 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004154 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004155 break;
4156 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004157 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004158 "unsupported format character '%c' (0x%x) "
4159 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004160 c, c,
4161 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004162 goto error;
4163 }
4164 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004165 if (*pbuf == '-' || *pbuf == '+') {
4166 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004167 len--;
4168 }
4169 else if (flags & F_SIGN)
4170 sign = '+';
4171 else if (flags & F_BLANK)
4172 sign = ' ';
4173 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004174 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004175 }
4176 if (width < len)
4177 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004178 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004179 reslen -= rescnt;
4180 rescnt = width + fmtcnt + 100;
4181 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004182 if (reslen < 0) {
4183 Py_DECREF(result);
4184 return PyErr_NoMemory();
4185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004186 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004187 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004188 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004189 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004190 }
4191 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004192 if (fill != ' ')
4193 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004194 rescnt--;
4195 if (width > len)
4196 width--;
4197 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004198 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4199 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004200 assert(pbuf[1] == c);
4201 if (fill != ' ') {
4202 *res++ = *pbuf++;
4203 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004204 }
Tim Petersfff53252001-04-12 18:38:48 +00004205 rescnt -= 2;
4206 width -= 2;
4207 if (width < 0)
4208 width = 0;
4209 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004210 }
4211 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004212 do {
4213 --rescnt;
4214 *res++ = fill;
4215 } while (--width > len);
4216 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004217 if (fill == ' ') {
4218 if (sign)
4219 *res++ = sign;
4220 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004221 (c == 'x' || c == 'X')) {
4222 assert(pbuf[0] == '0');
4223 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004224 *res++ = *pbuf++;
4225 *res++ = *pbuf++;
4226 }
4227 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004228 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004229 res += len;
4230 rescnt -= len;
4231 while (--width >= len) {
4232 --rescnt;
4233 *res++ = ' ';
4234 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004235 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004236 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004237 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004238 goto error;
4239 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004240 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004241 } /* '%' */
4242 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004243 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004244 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004245 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004246 goto error;
4247 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004248 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004249 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004250 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004251 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004252 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004253
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004254#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004255 unicode:
4256 if (args_owned) {
4257 Py_DECREF(args);
4258 args_owned = 0;
4259 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004260 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004261 if (PyTuple_Check(orig_args) && argidx > 0) {
4262 PyObject *v;
4263 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4264 v = PyTuple_New(n);
4265 if (v == NULL)
4266 goto error;
4267 while (--n >= 0) {
4268 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4269 Py_INCREF(w);
4270 PyTuple_SET_ITEM(v, n, w);
4271 }
4272 args = v;
4273 } else {
4274 Py_INCREF(orig_args);
4275 args = orig_args;
4276 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004277 args_owned = 1;
4278 /* Take what we have of the result and let the Unicode formatting
4279 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004280 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004281 if (_PyString_Resize(&result, rescnt))
4282 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004283 fmtcnt = PyString_GET_SIZE(format) - \
4284 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004285 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4286 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004287 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004288 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004289 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004290 if (v == NULL)
4291 goto error;
4292 /* Paste what we have (result) to what the Unicode formatting
4293 function returned (v) and return the result (or error) */
4294 w = PyUnicode_Concat(result, v);
4295 Py_DECREF(result);
4296 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004297 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004298 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004299#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004300
Guido van Rossume5372401993-03-16 12:15:04 +00004301 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004302 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004303 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004304 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004305 }
Guido van Rossume5372401993-03-16 12:15:04 +00004306 return NULL;
4307}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004308
Guido van Rossum2a61e741997-01-18 07:55:05 +00004309void
Fred Drakeba096332000-07-09 07:04:36 +00004310PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004311{
4312 register PyStringObject *s = (PyStringObject *)(*p);
4313 PyObject *t;
4314 if (s == NULL || !PyString_Check(s))
4315 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004316 /* If it's a string subclass, we don't really know what putting
4317 it in the interned dict might do. */
4318 if (!PyString_CheckExact(s))
4319 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004320 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004321 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004322 if (interned == NULL) {
4323 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004324 if (interned == NULL) {
4325 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004326 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004327 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004328 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004329 t = PyDict_GetItem(interned, (PyObject *)s);
4330 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004331 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004332 Py_DECREF(*p);
4333 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004334 return;
4335 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004336
Armin Rigo79f7ad22004-08-07 19:27:39 +00004337 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004338 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004339 return;
4340 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004341 /* The two references in interned are not counted by refcnt.
4342 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004343 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004344 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345}
4346
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004347void
4348PyString_InternImmortal(PyObject **p)
4349{
4350 PyString_InternInPlace(p);
4351 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4352 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4353 Py_INCREF(*p);
4354 }
4355}
4356
Guido van Rossum2a61e741997-01-18 07:55:05 +00004357
4358PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004359PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004360{
4361 PyObject *s = PyString_FromString(cp);
4362 if (s == NULL)
4363 return NULL;
4364 PyString_InternInPlace(&s);
4365 return s;
4366}
4367
Guido van Rossum8cf04761997-08-02 02:57:45 +00004368void
Fred Drakeba096332000-07-09 07:04:36 +00004369PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004370{
4371 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004372 for (i = 0; i < UCHAR_MAX + 1; i++) {
4373 Py_XDECREF(characters[i]);
4374 characters[i] = NULL;
4375 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004376 Py_XDECREF(nullstring);
4377 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004378}
Barry Warsawa903ad982001-02-23 16:40:48 +00004379
Barry Warsawa903ad982001-02-23 16:40:48 +00004380void _Py_ReleaseInternedStrings(void)
4381{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004382 PyObject *keys;
4383 PyStringObject *s;
4384 int i, n;
4385
4386 if (interned == NULL || !PyDict_Check(interned))
4387 return;
4388 keys = PyDict_Keys(interned);
4389 if (keys == NULL || !PyList_Check(keys)) {
4390 PyErr_Clear();
4391 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004392 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004393
4394 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4395 detector, interned strings are not forcibly deallocated; rather, we
4396 give them their stolen references back, and then clear and DECREF
4397 the interned dict. */
4398
4399 fprintf(stderr, "releasing interned strings\n");
4400 n = PyList_GET_SIZE(keys);
4401 for (i = 0; i < n; i++) {
4402 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4403 switch (s->ob_sstate) {
4404 case SSTATE_NOT_INTERNED:
4405 /* XXX Shouldn't happen */
4406 break;
4407 case SSTATE_INTERNED_IMMORTAL:
4408 s->ob_refcnt += 1;
4409 break;
4410 case SSTATE_INTERNED_MORTAL:
4411 s->ob_refcnt += 2;
4412 break;
4413 default:
4414 Py_FatalError("Inconsistent interned string state.");
4415 }
4416 s->ob_sstate = SSTATE_NOT_INTERNED;
4417 }
4418 Py_DECREF(keys);
4419 PyDict_Clear(interned);
4420 Py_DECREF(interned);
4421 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004422}