blob: 361d84d942c1dc8bfa926184e0a26eb3285fd03c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000074 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000130 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 int n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' in %ld, since it doesn't change the
180 width. although only %d is supported (see
181 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000182 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000183 if (*f == 'l' && *(f+1) == 'd')
184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'i': case 'x':
194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 int i, longflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (isdigit(Py_CHARMASK(*f)))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
252 f++;
253 /* handle the long flag, but only for %ld. others
254 can be added when necessary. */
255 if (*f == 'l' && *(f+1) == 'd') {
256 longflag = 1;
257 ++f;
258 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000259
Barry Warsawdadace02001-08-24 18:32:06 +0000260 switch (*f) {
261 case 'c':
262 *s++ = va_arg(vargs, int);
263 break;
264 case 'd':
265 if (longflag)
266 sprintf(s, "%ld", va_arg(vargs, long));
267 else
268 sprintf(s, "%d", va_arg(vargs, int));
269 s += strlen(s);
270 break;
271 case 'i':
272 sprintf(s, "%i", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'x':
276 sprintf(s, "%x", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 's':
280 p = va_arg(vargs, char*);
281 i = strlen(p);
282 if (n > 0 && i > n)
283 i = n;
284 memcpy(s, p, i);
285 s += i;
286 break;
287 case 'p':
288 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000289 /* %p is ill-defined: ensure leading 0x. */
290 if (s[1] == 'X')
291 s[1] = 'x';
292 else if (s[1] != 'x') {
293 memmove(s+2, s, strlen(s)+1);
294 s[0] = '0';
295 s[1] = 'x';
296 }
Barry Warsawdadace02001-08-24 18:32:06 +0000297 s += strlen(s);
298 break;
299 case '%':
300 *s++ = '%';
301 break;
302 default:
303 strcpy(s, p);
304 s += strlen(s);
305 goto end;
306 }
307 } else
308 *s++ = *f;
309 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310
Barry Warsawdadace02001-08-24 18:32:06 +0000311 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000313 return string;
314}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315
Barry Warsawdadace02001-08-24 18:32:06 +0000316PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000317PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000318{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000319 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000320 va_list vargs;
321
322#ifdef HAVE_STDARG_PROTOTYPES
323 va_start(vargs, format);
324#else
325 va_start(vargs);
326#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000327 ret = PyString_FromFormatV(format, vargs);
328 va_end(vargs);
329 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000330}
331
332
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000333PyObject *PyString_Decode(const char *s,
334 int size,
335 const char *encoding,
336 const char *errors)
337{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000338 PyObject *v, *str;
339
340 str = PyString_FromStringAndSize(s, size);
341 if (str == NULL)
342 return NULL;
343 v = PyString_AsDecodedString(str, encoding, errors);
344 Py_DECREF(str);
345 return v;
346}
347
348PyObject *PyString_AsDecodedObject(PyObject *str,
349 const char *encoding,
350 const char *errors)
351{
352 PyObject *v;
353
354 if (!PyString_Check(str)) {
355 PyErr_BadArgument();
356 goto onError;
357 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000358
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000359 if (encoding == NULL) {
360#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000362#else
363 PyErr_SetString(PyExc_ValueError, "no encoding specified");
364 goto onError;
365#endif
366 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367
368 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 v = PyCodec_Decode(str, encoding, errors);
370 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372
373 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000374
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376 return NULL;
377}
378
379PyObject *PyString_AsDecodedString(PyObject *str,
380 const char *encoding,
381 const char *errors)
382{
383 PyObject *v;
384
385 v = PyString_AsDecodedObject(str, encoding, errors);
386 if (v == NULL)
387 goto onError;
388
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000389#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 /* Convert Unicode to a string using the default encoding */
391 if (PyUnicode_Check(v)) {
392 PyObject *temp = v;
393 v = PyUnicode_AsEncodedString(v, NULL, NULL);
394 Py_DECREF(temp);
395 if (v == NULL)
396 goto onError;
397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000398#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 if (!PyString_Check(v)) {
400 PyErr_Format(PyExc_TypeError,
401 "decoder did not return a string object (type=%.400s)",
402 v->ob_type->tp_name);
403 Py_DECREF(v);
404 goto onError;
405 }
406
407 return v;
408
409 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000410 return NULL;
411}
412
413PyObject *PyString_Encode(const char *s,
414 int size,
415 const char *encoding,
416 const char *errors)
417{
418 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000419
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000420 str = PyString_FromStringAndSize(s, size);
421 if (str == NULL)
422 return NULL;
423 v = PyString_AsEncodedString(str, encoding, errors);
424 Py_DECREF(str);
425 return v;
426}
427
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000428PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 const char *encoding,
430 const char *errors)
431{
432 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000433
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 if (!PyString_Check(str)) {
435 PyErr_BadArgument();
436 goto onError;
437 }
438
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000439 if (encoding == NULL) {
440#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000442#else
443 PyErr_SetString(PyExc_ValueError, "no encoding specified");
444 goto onError;
445#endif
446 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447
448 /* Encode via the codec registry */
449 v = PyCodec_Encode(str, encoding, errors);
450 if (v == NULL)
451 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452
453 return v;
454
455 onError:
456 return NULL;
457}
458
459PyObject *PyString_AsEncodedString(PyObject *str,
460 const char *encoding,
461 const char *errors)
462{
463 PyObject *v;
464
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000465 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000466 if (v == NULL)
467 goto onError;
468
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000469#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 /* Convert Unicode to a string using the default encoding */
471 if (PyUnicode_Check(v)) {
472 PyObject *temp = v;
473 v = PyUnicode_AsEncodedString(v, NULL, NULL);
474 Py_DECREF(temp);
475 if (v == NULL)
476 goto onError;
477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000478#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 if (!PyString_Check(v)) {
480 PyErr_Format(PyExc_TypeError,
481 "encoder did not return a string object (type=%.400s)",
482 v->ob_type->tp_name);
483 Py_DECREF(v);
484 goto onError;
485 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000488
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000489 onError:
490 return NULL;
491}
492
Guido van Rossum234f9421993-06-17 12:35:49 +0000493static void
Fred Drakeba096332000-07-09 07:04:36 +0000494string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000495{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000496 switch (PyString_CHECK_INTERNED(op)) {
497 case SSTATE_NOT_INTERNED:
498 break;
499
500 case SSTATE_INTERNED_MORTAL:
501 /* revive dead object temporarily for DelItem */
502 op->ob_refcnt = 3;
503 if (PyDict_DelItem(interned, op) != 0)
504 Py_FatalError(
505 "deletion of interned string failed");
506 break;
507
508 case SSTATE_INTERNED_IMMORTAL:
509 Py_FatalError("Immortal interned string died.");
510
511 default:
512 Py_FatalError("Inconsistent interned string state.");
513 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000514 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000515}
516
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000517/* Unescape a backslash-escaped string. If unicode is non-zero,
518 the string is a u-literal. If recode_encoding is non-zero,
519 the string is UTF-8 encoded and should be re-encoded in the
520 specified encoding. */
521
522PyObject *PyString_DecodeEscape(const char *s,
523 int len,
524 const char *errors,
525 int unicode,
526 const char *recode_encoding)
527{
528 int c;
529 char *p, *buf;
530 const char *end;
531 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000532 int newlen = recode_encoding ? 4*len:len;
533 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 if (v == NULL)
535 return NULL;
536 p = buf = PyString_AsString(v);
537 end = s + len;
538 while (s < end) {
539 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000540 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000541#ifdef Py_USING_UNICODE
542 if (recode_encoding && (*s & 0x80)) {
543 PyObject *u, *w;
544 char *r;
545 const char* t;
546 int rn;
547 t = s;
548 /* Decode non-ASCII bytes as UTF-8. */
549 while (t < end && (*t & 0x80)) t++;
550 u = PyUnicode_DecodeUTF8(s, t - s, errors);
551 if(!u) goto failed;
552
553 /* Recode them in target encoding. */
554 w = PyUnicode_AsEncodedString(
555 u, recode_encoding, errors);
556 Py_DECREF(u);
557 if (!w) goto failed;
558
559 /* Append bytes to output buffer. */
560 r = PyString_AsString(w);
561 rn = PyString_Size(w);
562 memcpy(p, r, rn);
563 p += rn;
564 Py_DECREF(w);
565 s = t;
566 } else {
567 *p++ = *s++;
568 }
569#else
570 *p++ = *s++;
571#endif
572 continue;
573 }
574 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000575 if (s==end) {
576 PyErr_SetString(PyExc_ValueError,
577 "Trailing \\ in string");
578 goto failed;
579 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000580 switch (*s++) {
581 /* XXX This assumes ASCII! */
582 case '\n': break;
583 case '\\': *p++ = '\\'; break;
584 case '\'': *p++ = '\''; break;
585 case '\"': *p++ = '\"'; break;
586 case 'b': *p++ = '\b'; break;
587 case 'f': *p++ = '\014'; break; /* FF */
588 case 't': *p++ = '\t'; break;
589 case 'n': *p++ = '\n'; break;
590 case 'r': *p++ = '\r'; break;
591 case 'v': *p++ = '\013'; break; /* VT */
592 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
593 case '0': case '1': case '2': case '3':
594 case '4': case '5': case '6': case '7':
595 c = s[-1] - '0';
596 if ('0' <= *s && *s <= '7') {
597 c = (c<<3) + *s++ - '0';
598 if ('0' <= *s && *s <= '7')
599 c = (c<<3) + *s++ - '0';
600 }
601 *p++ = c;
602 break;
603 case 'x':
604 if (isxdigit(Py_CHARMASK(s[0]))
605 && isxdigit(Py_CHARMASK(s[1]))) {
606 unsigned int x = 0;
607 c = Py_CHARMASK(*s);
608 s++;
609 if (isdigit(c))
610 x = c - '0';
611 else if (islower(c))
612 x = 10 + c - 'a';
613 else
614 x = 10 + c - 'A';
615 x = x << 4;
616 c = Py_CHARMASK(*s);
617 s++;
618 if (isdigit(c))
619 x += c - '0';
620 else if (islower(c))
621 x += 10 + c - 'a';
622 else
623 x += 10 + c - 'A';
624 *p++ = x;
625 break;
626 }
627 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 PyErr_SetString(PyExc_ValueError,
629 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000630 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 }
632 if (strcmp(errors, "replace") == 0) {
633 *p++ = '?';
634 } else if (strcmp(errors, "ignore") == 0)
635 /* do nothing */;
636 else {
637 PyErr_Format(PyExc_ValueError,
638 "decoding error; "
639 "unknown error handling code: %.400s",
640 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000641 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000642 }
643#ifndef Py_USING_UNICODE
644 case 'u':
645 case 'U':
646 case 'N':
647 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000648 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 "Unicode escapes not legal "
650 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000651 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653#endif
654 default:
655 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000656 s--;
657 goto non_esc; /* an arbitry number of unescaped
658 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659 }
660 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000661 if (p-buf < newlen)
662 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 return v;
664 failed:
665 Py_DECREF(v);
666 return NULL;
667}
668
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000669static int
670string_getsize(register PyObject *op)
671{
672 char *s;
673 int len;
674 if (PyString_AsStringAndSize(op, &s, &len))
675 return -1;
676 return len;
677}
678
679static /*const*/ char *
680string_getbuffer(register PyObject *op)
681{
682 char *s;
683 int len;
684 if (PyString_AsStringAndSize(op, &s, &len))
685 return NULL;
686 return s;
687}
688
Guido van Rossumd7047b31995-01-02 19:07:15 +0000689int
Fred Drakeba096332000-07-09 07:04:36 +0000690PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000691{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000692 if (!PyString_Check(op))
693 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000694 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695}
696
697/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000698PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (!PyString_Check(op))
701 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000702 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703}
704
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705int
706PyString_AsStringAndSize(register PyObject *obj,
707 register char **s,
708 register int *len)
709{
710 if (s == NULL) {
711 PyErr_BadInternalCall();
712 return -1;
713 }
714
715 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717 if (PyUnicode_Check(obj)) {
718 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
719 if (obj == NULL)
720 return -1;
721 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000722 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000723#endif
724 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000725 PyErr_Format(PyExc_TypeError,
726 "expected string or Unicode object, "
727 "%.200s found", obj->ob_type->tp_name);
728 return -1;
729 }
730 }
731
732 *s = PyString_AS_STRING(obj);
733 if (len != NULL)
734 *len = PyString_GET_SIZE(obj);
735 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
736 PyErr_SetString(PyExc_TypeError,
737 "expected string without null bytes");
738 return -1;
739 }
740 return 0;
741}
742
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000743/* Methods */
744
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000745static int
Fred Drakeba096332000-07-09 07:04:36 +0000746string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747{
748 int i;
749 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000750 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000751
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000752 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000753 if (! PyString_CheckExact(op)) {
754 int ret;
755 /* A str subclass may have its own __str__ method. */
756 op = (PyStringObject *) PyObject_Str((PyObject *)op);
757 if (op == NULL)
758 return -1;
759 ret = string_print(op, fp, flags);
760 Py_DECREF(op);
761 return ret;
762 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000763 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000764#ifdef __VMS
765 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
766#else
767 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
768#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000769 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771
Thomas Wouters7e474022000-07-16 12:04:32 +0000772 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000774 if (memchr(op->ob_sval, '\'', op->ob_size) &&
775 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000776 quote = '"';
777
778 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 for (i = 0; i < op->ob_size; i++) {
780 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000781 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\r");
789 else if (c < ' ' || c >= 0x7f)
790 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000791 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796}
797
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798PyObject *
799PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000801 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
803 PyObject *v;
804 if (newsize > INT_MAX) {
805 PyErr_SetString(PyExc_OverflowError,
806 "string is too large to make repr");
807 }
808 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
812 else {
813 register int i;
814 register char c;
815 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 int quote;
817
Thomas Wouters7e474022000-07-16 12:04:32 +0000818 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000820 if (smartquotes &&
821 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000822 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '"';
824
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000828 /* There's at least enough room for a hex escape
829 and a closing quote. */
830 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000834 else if (c == '\t')
835 *p++ = '\\', *p++ = 't';
836 else if (c == '\n')
837 *p++ = '\\', *p++ = 'n';
838 else if (c == '\r')
839 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 else if (c < ' ' || c >= 0x7f) {
841 /* For performance, we don't want to call
842 PyOS_snprintf here (extra layers of
843 function call). */
844 sprintf(p, "\\x%02x", c & 0xff);
845 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 else
848 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000850 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000853 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000855 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857}
858
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860string_repr(PyObject *op)
861{
862 return PyString_Repr(op, 1);
863}
864
865static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000866string_str(PyObject *s)
867{
Tim Petersc9933152001-10-16 20:18:24 +0000868 assert(PyString_Check(s));
869 if (PyString_CheckExact(s)) {
870 Py_INCREF(s);
871 return s;
872 }
873 else {
874 /* Subtype -- return genuine string with the same value. */
875 PyStringObject *t = (PyStringObject *) s;
876 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
877 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000878}
879
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880static int
Fred Drakeba096332000-07-09 07:04:36 +0000881string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882{
883 return a->ob_size;
884}
885
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000887string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888{
889 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 register PyStringObject *op;
891 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000893 if (PyUnicode_Check(bb))
894 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000895#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000896 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000897 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000898 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 return NULL;
900 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000901#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000903 if ((a->ob_size == 0 || b->ob_size == 0) &&
904 PyString_CheckExact(a) && PyString_CheckExact(b)) {
905 if (a->ob_size == 0) {
906 Py_INCREF(bb);
907 return bb;
908 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000909 Py_INCREF(a);
910 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
912 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000913 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000915 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000916 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000918 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000920 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
922 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
923 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925#undef b
926}
927
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000929string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930{
931 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000932 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000933 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000935 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 if (n < 0)
937 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000938 /* watch out for overflows: the size can overflow int,
939 * and the # of bytes needed can overflow size_t
940 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000941 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000942 if (n && size / n != a->ob_size) {
943 PyErr_SetString(PyExc_OverflowError,
944 "repeated string is too long");
945 return NULL;
946 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000947 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 Py_INCREF(a);
949 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950 }
Tim Peters8f422462000-09-09 06:13:41 +0000951 nbytes = size * sizeof(char);
952 if (nbytes / sizeof(char) != (size_t)size ||
953 nbytes + sizeof(PyStringObject) <= nbytes) {
954 PyErr_SetString(PyExc_OverflowError,
955 "repeated string is too long");
956 return NULL;
957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000959 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000960 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000962 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000963 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000964 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000965 op->ob_sval[size] = '\0';
966 if (a->ob_size == 1 && n > 0) {
967 memset(op->ob_sval, a->ob_sval[0] , n);
968 return (PyObject *) op;
969 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000970 i = 0;
971 if (i < size) {
972 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
973 i = (int) a->ob_size;
974 }
975 while (i < size) {
976 j = (i <= size-i) ? i : size-i;
977 memcpy(op->ob_sval+i, op->ob_sval, j);
978 i += j;
979 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981}
982
983/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000986string_slice(register PyStringObject *a, register int i, register int j)
987 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988{
989 if (i < 0)
990 i = 0;
991 if (j < 0)
992 j = 0; /* Avoid signed/unsigned bug in next line */
993 if (j > a->ob_size)
994 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
996 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
1000 if (j < i)
1001 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003}
1004
Guido van Rossum9284a572000-03-07 15:53:43 +00001005static int
Fred Drakeba096332000-07-09 07:04:36 +00001006string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001007{
Barry Warsaw817918c2002-08-06 16:58:21 +00001008 const char *lhs, *rhs, *end;
1009 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001010
1011 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001012#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 if (PyUnicode_Check(el))
1014 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001015#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001016 if (!PyString_Check(el)) {
1017 PyErr_SetString(PyExc_TypeError,
1018 "'in <string>' requires string as left operand");
1019 return -1;
1020 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001021 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001022 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001023 rhs = PyString_AS_STRING(el);
1024 lhs = PyString_AS_STRING(a);
1025
1026 /* optimize for a single character */
1027 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001028 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001029
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001030 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001031 while (lhs <= end) {
1032 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001033 return 1;
1034 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001035
Guido van Rossum9284a572000-03-07 15:53:43 +00001036 return 0;
1037}
1038
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001040string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001041{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001043 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046 return NULL;
1047 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001048 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001049 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001050 if (v == NULL)
1051 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001052 else {
1053#ifdef COUNT_ALLOCS
1054 one_strings++;
1055#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001056 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001057 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001058 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059}
1060
Martin v. Löwiscd353062001-05-24 16:56:35 +00001061static PyObject*
1062string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001064 int c;
1065 int len_a, len_b;
1066 int min_len;
1067 PyObject *result;
1068
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001069 /* Make sure both arguments are strings. */
1070 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071 result = Py_NotImplemented;
1072 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001073 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 if (a == b) {
1075 switch (op) {
1076 case Py_EQ:case Py_LE:case Py_GE:
1077 result = Py_True;
1078 goto out;
1079 case Py_NE:case Py_LT:case Py_GT:
1080 result = Py_False;
1081 goto out;
1082 }
1083 }
1084 if (op == Py_EQ) {
1085 /* Supporting Py_NE here as well does not save
1086 much time, since Py_NE is rarely used. */
1087 if (a->ob_size == b->ob_size
1088 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001089 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 a->ob_size) == 0)) {
1091 result = Py_True;
1092 } else {
1093 result = Py_False;
1094 }
1095 goto out;
1096 }
1097 len_a = a->ob_size; len_b = b->ob_size;
1098 min_len = (len_a < len_b) ? len_a : len_b;
1099 if (min_len > 0) {
1100 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1101 if (c==0)
1102 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1103 }else
1104 c = 0;
1105 if (c == 0)
1106 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1107 switch (op) {
1108 case Py_LT: c = c < 0; break;
1109 case Py_LE: c = c <= 0; break;
1110 case Py_EQ: assert(0); break; /* unreachable */
1111 case Py_NE: c = c != 0; break;
1112 case Py_GT: c = c > 0; break;
1113 case Py_GE: c = c >= 0; break;
1114 default:
1115 result = Py_NotImplemented;
1116 goto out;
1117 }
1118 result = c ? Py_True : Py_False;
1119 out:
1120 Py_INCREF(result);
1121 return result;
1122}
1123
1124int
1125_PyString_Eq(PyObject *o1, PyObject *o2)
1126{
1127 PyStringObject *a, *b;
1128 a = (PyStringObject*)o1;
1129 b = (PyStringObject*)o2;
1130 return a->ob_size == b->ob_size
1131 && *a->ob_sval == *b->ob_sval
1132 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001133}
1134
Guido van Rossum9bfef441993-03-29 10:43:31 +00001135static long
Fred Drakeba096332000-07-09 07:04:36 +00001136string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001137{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 register int len;
1139 register unsigned char *p;
1140 register long x;
1141
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 if (a->ob_shash != -1)
1143 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001144 len = a->ob_size;
1145 p = (unsigned char *) a->ob_sval;
1146 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001148 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 x ^= a->ob_size;
1150 if (x == -1)
1151 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 return x;
1154}
1155
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001156static PyObject*
1157string_subscript(PyStringObject* self, PyObject* item)
1158{
1159 if (PyInt_Check(item)) {
1160 long i = PyInt_AS_LONG(item);
1161 if (i < 0)
1162 i += PyString_GET_SIZE(self);
1163 return string_item(self,i);
1164 }
1165 else if (PyLong_Check(item)) {
1166 long i = PyLong_AsLong(item);
1167 if (i == -1 && PyErr_Occurred())
1168 return NULL;
1169 if (i < 0)
1170 i += PyString_GET_SIZE(self);
1171 return string_item(self,i);
1172 }
1173 else if (PySlice_Check(item)) {
1174 int start, stop, step, slicelength, cur, i;
1175 char* source_buf;
1176 char* result_buf;
1177 PyObject* result;
1178
1179 if (PySlice_GetIndicesEx((PySliceObject*)item,
1180 PyString_GET_SIZE(self),
1181 &start, &stop, &step, &slicelength) < 0) {
1182 return NULL;
1183 }
1184
1185 if (slicelength <= 0) {
1186 return PyString_FromStringAndSize("", 0);
1187 }
1188 else {
1189 source_buf = PyString_AsString((PyObject*)self);
1190 result_buf = PyMem_Malloc(slicelength);
1191
1192 for (cur = start, i = 0; i < slicelength;
1193 cur += step, i++) {
1194 result_buf[i] = source_buf[cur];
1195 }
1196
1197 result = PyString_FromStringAndSize(result_buf,
1198 slicelength);
1199 PyMem_Free(result_buf);
1200 return result;
1201 }
1202 }
1203 else {
1204 PyErr_SetString(PyExc_TypeError,
1205 "string indices must be integers");
1206 return NULL;
1207 }
1208}
1209
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001210static int
Fred Drakeba096332000-07-09 07:04:36 +00001211string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212{
1213 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001214 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001215 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216 return -1;
1217 }
1218 *ptr = (void *)self->ob_sval;
1219 return self->ob_size;
1220}
1221
1222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224{
Guido van Rossum045e6881997-09-08 18:30:11 +00001225 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001226 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001227 return -1;
1228}
1229
1230static int
Fred Drakeba096332000-07-09 07:04:36 +00001231string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001232{
1233 if ( lenp )
1234 *lenp = self->ob_size;
1235 return 1;
1236}
1237
Guido van Rossum1db70701998-10-08 02:18:52 +00001238static int
Fred Drakeba096332000-07-09 07:04:36 +00001239string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001240{
1241 if ( index != 0 ) {
1242 PyErr_SetString(PyExc_SystemError,
1243 "accessing non-existent string segment");
1244 return -1;
1245 }
1246 *ptr = self->ob_sval;
1247 return self->ob_size;
1248}
1249
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001250static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001251 (inquiry)string_length, /*sq_length*/
1252 (binaryfunc)string_concat, /*sq_concat*/
1253 (intargfunc)string_repeat, /*sq_repeat*/
1254 (intargfunc)string_item, /*sq_item*/
1255 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001256 0, /*sq_ass_item*/
1257 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001258 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001259};
1260
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001261static PyMappingMethods string_as_mapping = {
1262 (inquiry)string_length,
1263 (binaryfunc)string_subscript,
1264 0,
1265};
1266
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267static PyBufferProcs string_as_buffer = {
1268 (getreadbufferproc)string_buffer_getreadbuf,
1269 (getwritebufferproc)string_buffer_getwritebuf,
1270 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001271 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272};
1273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274
1275
1276#define LEFTSTRIP 0
1277#define RIGHTSTRIP 1
1278#define BOTHSTRIP 2
1279
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001281static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1282
1283#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001285#define SPLIT_APPEND(data, left, right) \
1286 str = PyString_FromStringAndSize((data) + (left), \
1287 (right) - (left)); \
1288 if (str == NULL) \
1289 goto onError; \
1290 if (PyList_Append(list, str)) { \
1291 Py_DECREF(str); \
1292 goto onError; \
1293 } \
1294 else \
1295 Py_DECREF(str);
1296
1297#define SPLIT_INSERT(data, left, right) \
1298 str = PyString_FromStringAndSize((data) + (left), \
1299 (right) - (left)); \
1300 if (str == NULL) \
1301 goto onError; \
1302 if (PyList_Insert(list, 0, str)) { \
1303 Py_DECREF(str); \
1304 goto onError; \
1305 } \
1306 else \
1307 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308
1309static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001310split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001312 int i, j;
1313 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314 PyObject *list = PyList_New(0);
1315
1316 if (list == NULL)
1317 return NULL;
1318
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 for (i = j = 0; i < len; ) {
1320 while (i < len && isspace(Py_CHARMASK(s[i])))
1321 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323 while (i < len && !isspace(Py_CHARMASK(s[i])))
1324 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 if (maxsplit-- <= 0)
1327 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001328 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 while (i < len && isspace(Py_CHARMASK(s[i])))
1330 i++;
1331 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332 }
1333 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001334 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 Py_DECREF(list);
1340 return NULL;
1341}
1342
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001343static PyObject *
1344split_char(const char *s, int len, char ch, int maxcount)
1345{
1346 register int i, j;
1347 PyObject *str;
1348 PyObject *list = PyList_New(0);
1349
1350 if (list == NULL)
1351 return NULL;
1352
1353 for (i = j = 0; i < len; ) {
1354 if (s[i] == ch) {
1355 if (maxcount-- <= 0)
1356 break;
1357 SPLIT_APPEND(s, j, i);
1358 i = j = i + 1;
1359 } else
1360 i++;
1361 }
1362 if (j <= len) {
1363 SPLIT_APPEND(s, j, len);
1364 }
1365 return list;
1366
1367 onError:
1368 Py_DECREF(list);
1369 return NULL;
1370}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001372PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373"S.split([sep [,maxsplit]]) -> list of strings\n\
1374\n\
1375Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001376delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001377splits are done. If sep is not specified or is None, any\n\
1378whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
1380static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001381string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382{
1383 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001384 int maxsplit = -1;
1385 const char *s = PyString_AS_STRING(self), *sub;
1386 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387
Guido van Rossum4c08d552000-03-10 22:55:18 +00001388 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (maxsplit < 0)
1391 maxsplit = INT_MAX;
1392 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 if (PyString_Check(subobj)) {
1395 sub = PyString_AS_STRING(subobj);
1396 n = PyString_GET_SIZE(subobj);
1397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001398#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 else if (PyUnicode_Check(subobj))
1400 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001401#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001402 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1403 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001404
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 if (n == 0) {
1406 PyErr_SetString(PyExc_ValueError, "empty separator");
1407 return NULL;
1408 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409 else if (n == 1)
1410 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411
1412 list = PyList_New(0);
1413 if (list == NULL)
1414 return NULL;
1415
1416 i = j = 0;
1417 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001418 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001419 if (maxsplit-- <= 0)
1420 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1422 if (item == NULL)
1423 goto fail;
1424 err = PyList_Append(list, item);
1425 Py_DECREF(item);
1426 if (err < 0)
1427 goto fail;
1428 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429 }
1430 else
1431 i++;
1432 }
1433 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1434 if (item == NULL)
1435 goto fail;
1436 err = PyList_Append(list, item);
1437 Py_DECREF(item);
1438 if (err < 0)
1439 goto fail;
1440
1441 return list;
1442
1443 fail:
1444 Py_DECREF(list);
1445 return NULL;
1446}
1447
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001448static PyObject *
1449rsplit_whitespace(const char *s, int len, int maxsplit)
1450{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001451 int i, j;
1452 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001453 PyObject *list = PyList_New(0);
1454
1455 if (list == NULL)
1456 return NULL;
1457
1458 for (i = j = len - 1; i >= 0; ) {
1459 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1460 i--;
1461 j = i;
1462 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1463 i--;
1464 if (j > i) {
1465 if (maxsplit-- <= 0)
1466 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001467 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001468 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1469 i--;
1470 j = i;
1471 }
1472 }
1473 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001475 }
1476 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001478 Py_DECREF(list);
1479 return NULL;
1480}
1481
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001482static PyObject *
1483rsplit_char(const char *s, int len, char ch, int maxcount)
1484{
1485 register int i, j;
1486 PyObject *str;
1487 PyObject *list = PyList_New(0);
1488
1489 if (list == NULL)
1490 return NULL;
1491
1492 for (i = j = len - 1; i >= 0; ) {
1493 if (s[i] == ch) {
1494 if (maxcount-- <= 0)
1495 break;
1496 SPLIT_INSERT(s, i + 1, j + 1);
1497 j = i = i - 1;
1498 } else
1499 i--;
1500 }
1501 if (j >= -1) {
1502 SPLIT_INSERT(s, 0, j + 1);
1503 }
1504 return list;
1505
1506 onError:
1507 Py_DECREF(list);
1508 return NULL;
1509}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001510
1511PyDoc_STRVAR(rsplit__doc__,
1512"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1513\n\
1514Return a list of the words in the string S, using sep as the\n\
1515delimiter string, starting at the end of the string and working\n\
1516to the front. If maxsplit is given, at most maxsplit splits are\n\
1517done. If sep is not specified or is None, any whitespace string\n\
1518is a separator.");
1519
1520static PyObject *
1521string_rsplit(PyStringObject *self, PyObject *args)
1522{
1523 int len = PyString_GET_SIZE(self), n, i, j, err;
1524 int maxsplit = -1;
1525 const char *s = PyString_AS_STRING(self), *sub;
1526 PyObject *list, *item, *subobj = Py_None;
1527
1528 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1529 return NULL;
1530 if (maxsplit < 0)
1531 maxsplit = INT_MAX;
1532 if (subobj == Py_None)
1533 return rsplit_whitespace(s, len, maxsplit);
1534 if (PyString_Check(subobj)) {
1535 sub = PyString_AS_STRING(subobj);
1536 n = PyString_GET_SIZE(subobj);
1537 }
1538#ifdef Py_USING_UNICODE
1539 else if (PyUnicode_Check(subobj))
1540 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1541#endif
1542 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1543 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001544
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001545 if (n == 0) {
1546 PyErr_SetString(PyExc_ValueError, "empty separator");
1547 return NULL;
1548 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001549 else if (n == 1)
1550 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001551
1552 list = PyList_New(0);
1553 if (list == NULL)
1554 return NULL;
1555
1556 j = len;
1557 i = j - n;
1558 while (i >= 0) {
1559 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1560 if (maxsplit-- <= 0)
1561 break;
1562 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1563 if (item == NULL)
1564 goto fail;
1565 err = PyList_Insert(list, 0, item);
1566 Py_DECREF(item);
1567 if (err < 0)
1568 goto fail;
1569 j = i;
1570 i -= n;
1571 }
1572 else
1573 i--;
1574 }
1575 item = PyString_FromStringAndSize(s, j);
1576 if (item == NULL)
1577 goto fail;
1578 err = PyList_Insert(list, 0, item);
1579 Py_DECREF(item);
1580 if (err < 0)
1581 goto fail;
1582
1583 return list;
1584
1585 fail:
1586 Py_DECREF(list);
1587 return NULL;
1588}
1589
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.join(sequence) -> string\n\
1593\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001594Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596
1597static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001598string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599{
1600 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001601 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 char *p;
1604 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 size_t sz = 0;
1606 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001607 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608
Tim Peters19fe14e2001-01-19 03:03:47 +00001609 seq = PySequence_Fast(orig, "");
1610 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001611 if (PyErr_ExceptionMatches(PyExc_TypeError))
1612 PyErr_Format(PyExc_TypeError,
1613 "sequence expected, %.80s found",
1614 orig->ob_type->tp_name);
1615 return NULL;
1616 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001617
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001618 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001619 if (seqlen == 0) {
1620 Py_DECREF(seq);
1621 return PyString_FromString("");
1622 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001624 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001625 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1626 PyErr_Format(PyExc_TypeError,
1627 "sequence item 0: expected string,"
1628 " %.80s found",
1629 item->ob_type->tp_name);
1630 Py_DECREF(seq);
1631 return NULL;
1632 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001633 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001634 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001635 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001637
Tim Peters19fe14e2001-01-19 03:03:47 +00001638 /* There are at least two things to join. Do a pre-pass to figure out
1639 * the total amount of space we'll need (sz), see whether any argument
1640 * is absurd, and defer to the Unicode join if appropriate.
1641 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001642 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001643 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001644 item = PySequence_Fast_GET_ITEM(seq, i);
1645 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001646#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001647 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001648 /* Defer to Unicode join.
1649 * CAUTION: There's no gurantee that the
1650 * original sequence can be iterated over
1651 * again, so we must pass seq here.
1652 */
1653 PyObject *result;
1654 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001655 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001656 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001657 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001658#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001659 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001660 "sequence item %i: expected string,"
1661 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001662 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001663 Py_DECREF(seq);
1664 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001665 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001666 sz += PyString_GET_SIZE(item);
1667 if (i != 0)
1668 sz += seplen;
1669 if (sz < old_sz || sz > INT_MAX) {
1670 PyErr_SetString(PyExc_OverflowError,
1671 "join() is too long for a Python string");
1672 Py_DECREF(seq);
1673 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001675 }
1676
1677 /* Allocate result space. */
1678 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1679 if (res == NULL) {
1680 Py_DECREF(seq);
1681 return NULL;
1682 }
1683
1684 /* Catenate everything. */
1685 p = PyString_AS_STRING(res);
1686 for (i = 0; i < seqlen; ++i) {
1687 size_t n;
1688 item = PySequence_Fast_GET_ITEM(seq, i);
1689 n = PyString_GET_SIZE(item);
1690 memcpy(p, PyString_AS_STRING(item), n);
1691 p += n;
1692 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001693 memcpy(p, sep, seplen);
1694 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001695 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001697
Jeremy Hylton49048292000-07-11 03:28:17 +00001698 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700}
1701
Tim Peters52e155e2001-06-16 05:42:57 +00001702PyObject *
1703_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001704{
Tim Petersa7259592001-06-16 05:11:17 +00001705 assert(sep != NULL && PyString_Check(sep));
1706 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001707 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001708}
1709
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001710static void
1711string_adjust_indices(int *start, int *end, int len)
1712{
1713 if (*end > len)
1714 *end = len;
1715 else if (*end < 0)
1716 *end += len;
1717 if (*end < 0)
1718 *end = 0;
1719 if (*start < 0)
1720 *start += len;
1721 if (*start < 0)
1722 *start = 0;
1723}
1724
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725static long
Fred Drakeba096332000-07-09 07:04:36 +00001726string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729 int len = PyString_GET_SIZE(self);
1730 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001733 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001734 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735 return -2;
1736 if (PyString_Check(subobj)) {
1737 sub = PyString_AS_STRING(subobj);
1738 n = PyString_GET_SIZE(subobj);
1739 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001740#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001742 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001743#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001744 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 return -2;
1746
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001747 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 if (dir > 0) {
1750 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 last -= n;
1753 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001754 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 return (long)i;
1756 }
1757 else {
1758 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001759
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 if (n == 0 && i <= last)
1761 return (long)last;
1762 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001763 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001764 return (long)j;
1765 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001766
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767 return -1;
1768}
1769
1770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772"S.find(sub [,start [,end]]) -> int\n\
1773\n\
1774Return the lowest index in S where substring sub is found,\n\
1775such that sub is contained within s[start,end]. Optional\n\
1776arguments start and end are interpreted as in slice notation.\n\
1777\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001781string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001783 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 if (result == -2)
1785 return NULL;
1786 return PyInt_FromLong(result);
1787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791"S.index(sub [,start [,end]]) -> int\n\
1792\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001793Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794
1795static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001796string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001798 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799 if (result == -2)
1800 return NULL;
1801 if (result == -1) {
1802 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001803 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 return NULL;
1805 }
1806 return PyInt_FromLong(result);
1807}
1808
1809
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811"S.rfind(sub [,start [,end]]) -> int\n\
1812\n\
1813Return the highest index in S where substring sub is found,\n\
1814such that sub is contained within s[start,end]. Optional\n\
1815arguments start and end are interpreted as in slice notation.\n\
1816\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001817Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818
1819static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001820string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001822 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 if (result == -2)
1824 return NULL;
1825 return PyInt_FromLong(result);
1826}
1827
1828
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001829PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830"S.rindex(sub [,start [,end]]) -> int\n\
1831\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833
1834static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001835string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001837 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838 if (result == -2)
1839 return NULL;
1840 if (result == -1) {
1841 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001842 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 return NULL;
1844 }
1845 return PyInt_FromLong(result);
1846}
1847
1848
1849static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001850do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1851{
1852 char *s = PyString_AS_STRING(self);
1853 int len = PyString_GET_SIZE(self);
1854 char *sep = PyString_AS_STRING(sepobj);
1855 int seplen = PyString_GET_SIZE(sepobj);
1856 int i, j;
1857
1858 i = 0;
1859 if (striptype != RIGHTSTRIP) {
1860 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1861 i++;
1862 }
1863 }
1864
1865 j = len;
1866 if (striptype != LEFTSTRIP) {
1867 do {
1868 j--;
1869 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1870 j++;
1871 }
1872
1873 if (i == 0 && j == len && PyString_CheckExact(self)) {
1874 Py_INCREF(self);
1875 return (PyObject*)self;
1876 }
1877 else
1878 return PyString_FromStringAndSize(s+i, j-i);
1879}
1880
1881
1882static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001883do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884{
1885 char *s = PyString_AS_STRING(self);
1886 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888 i = 0;
1889 if (striptype != RIGHTSTRIP) {
1890 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1891 i++;
1892 }
1893 }
1894
1895 j = len;
1896 if (striptype != LEFTSTRIP) {
1897 do {
1898 j--;
1899 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1900 j++;
1901 }
1902
Tim Peters8fa5dd02001-09-12 02:18:30 +00001903 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904 Py_INCREF(self);
1905 return (PyObject*)self;
1906 }
1907 else
1908 return PyString_FromStringAndSize(s+i, j-i);
1909}
1910
1911
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001912static PyObject *
1913do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1914{
1915 PyObject *sep = NULL;
1916
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001917 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001918 return NULL;
1919
1920 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001921 if (PyString_Check(sep))
1922 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001923#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001924 else if (PyUnicode_Check(sep)) {
1925 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1926 PyObject *res;
1927 if (uniself==NULL)
1928 return NULL;
1929 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1930 striptype, sep);
1931 Py_DECREF(uniself);
1932 return res;
1933 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001934#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001935 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001936 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001937#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001938 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001939#else
1940 "%s arg must be None or str",
1941#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001942 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001943 return NULL;
1944 }
1945 return do_xstrip(self, striptype, sep);
1946 }
1947
1948 return do_strip(self, striptype);
1949}
1950
1951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001953"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954\n\
1955Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001956whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001957If chars is given and not None, remove characters in chars instead.\n\
1958If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959
1960static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001961string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963 if (PyTuple_GET_SIZE(args) == 0)
1964 return do_strip(self, BOTHSTRIP); /* Common case */
1965 else
1966 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001971"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001974If chars is given and not None, remove characters in chars instead.\n\
1975If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976
1977static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980 if (PyTuple_GET_SIZE(args) == 0)
1981 return do_strip(self, LEFTSTRIP); /* Common case */
1982 else
1983 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984}
1985
1986
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001987PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001988"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001991If chars is given and not None, remove characters in chars instead.\n\
1992If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
1994static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001995string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001997 if (PyTuple_GET_SIZE(args) == 0)
1998 return do_strip(self, RIGHTSTRIP); /* Common case */
1999 else
2000 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001}
2002
2003
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002004PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005"S.lower() -> string\n\
2006\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002007Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
2009static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002010string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011{
2012 char *s = PyString_AS_STRING(self), *s_new;
2013 int i, n = PyString_GET_SIZE(self);
2014 PyObject *new;
2015
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016 new = PyString_FromStringAndSize(NULL, n);
2017 if (new == NULL)
2018 return NULL;
2019 s_new = PyString_AsString(new);
2020 for (i = 0; i < n; i++) {
2021 int c = Py_CHARMASK(*s++);
2022 if (isupper(c)) {
2023 *s_new = tolower(c);
2024 } else
2025 *s_new = c;
2026 s_new++;
2027 }
2028 return new;
2029}
2030
2031
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002032PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033"S.upper() -> string\n\
2034\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002035Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036
2037static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002038string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039{
2040 char *s = PyString_AS_STRING(self), *s_new;
2041 int i, n = PyString_GET_SIZE(self);
2042 PyObject *new;
2043
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 new = PyString_FromStringAndSize(NULL, n);
2045 if (new == NULL)
2046 return NULL;
2047 s_new = PyString_AsString(new);
2048 for (i = 0; i < n; i++) {
2049 int c = Py_CHARMASK(*s++);
2050 if (islower(c)) {
2051 *s_new = toupper(c);
2052 } else
2053 *s_new = c;
2054 s_new++;
2055 }
2056 return new;
2057}
2058
2059
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061"S.title() -> string\n\
2062\n\
2063Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002064characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002067string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068{
2069 char *s = PyString_AS_STRING(self), *s_new;
2070 int i, n = PyString_GET_SIZE(self);
2071 int previous_is_cased = 0;
2072 PyObject *new;
2073
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074 new = PyString_FromStringAndSize(NULL, n);
2075 if (new == NULL)
2076 return NULL;
2077 s_new = PyString_AsString(new);
2078 for (i = 0; i < n; i++) {
2079 int c = Py_CHARMASK(*s++);
2080 if (islower(c)) {
2081 if (!previous_is_cased)
2082 c = toupper(c);
2083 previous_is_cased = 1;
2084 } else if (isupper(c)) {
2085 if (previous_is_cased)
2086 c = tolower(c);
2087 previous_is_cased = 1;
2088 } else
2089 previous_is_cased = 0;
2090 *s_new++ = c;
2091 }
2092 return new;
2093}
2094
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002095PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096"S.capitalize() -> string\n\
2097\n\
2098Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002099capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100
2101static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002102string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
2104 char *s = PyString_AS_STRING(self), *s_new;
2105 int i, n = PyString_GET_SIZE(self);
2106 PyObject *new;
2107
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 new = PyString_FromStringAndSize(NULL, n);
2109 if (new == NULL)
2110 return NULL;
2111 s_new = PyString_AsString(new);
2112 if (0 < n) {
2113 int c = Py_CHARMASK(*s++);
2114 if (islower(c))
2115 *s_new = toupper(c);
2116 else
2117 *s_new = c;
2118 s_new++;
2119 }
2120 for (i = 1; i < n; i++) {
2121 int c = Py_CHARMASK(*s++);
2122 if (isupper(c))
2123 *s_new = tolower(c);
2124 else
2125 *s_new = c;
2126 s_new++;
2127 }
2128 return new;
2129}
2130
2131
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002132PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133"S.count(sub[, start[, end]]) -> int\n\
2134\n\
2135Return the number of occurrences of substring sub in string\n\
2136S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002137interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
2139static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002140string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 int len = PyString_GET_SIZE(self), n;
2144 int i = 0, last = INT_MAX;
2145 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002146 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147
Guido van Rossumc6821402000-05-08 14:08:05 +00002148 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2149 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002151
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 if (PyString_Check(subobj)) {
2153 sub = PyString_AS_STRING(subobj);
2154 n = PyString_GET_SIZE(subobj);
2155 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002156#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002157 else if (PyUnicode_Check(subobj)) {
2158 int count;
2159 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2160 if (count == -1)
2161 return NULL;
2162 else
2163 return PyInt_FromLong((long) count);
2164 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002165#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2167 return NULL;
2168
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002169 string_adjust_indices(&i, &last, len);
2170
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 m = last + 1 - n;
2172 if (n == 0)
2173 return PyInt_FromLong((long) (m-i));
2174
2175 r = 0;
2176 while (i < m) {
2177 if (!memcmp(s+i, sub, n)) {
2178 r++;
2179 i += n;
2180 } else {
2181 i++;
2182 }
2183 }
2184 return PyInt_FromLong((long) r);
2185}
2186
2187
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002188PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189"S.swapcase() -> string\n\
2190\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002192converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193
2194static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002195string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196{
2197 char *s = PyString_AS_STRING(self), *s_new;
2198 int i, n = PyString_GET_SIZE(self);
2199 PyObject *new;
2200
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201 new = PyString_FromStringAndSize(NULL, n);
2202 if (new == NULL)
2203 return NULL;
2204 s_new = PyString_AsString(new);
2205 for (i = 0; i < n; i++) {
2206 int c = Py_CHARMASK(*s++);
2207 if (islower(c)) {
2208 *s_new = toupper(c);
2209 }
2210 else if (isupper(c)) {
2211 *s_new = tolower(c);
2212 }
2213 else
2214 *s_new = c;
2215 s_new++;
2216 }
2217 return new;
2218}
2219
2220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002221PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222"S.translate(table [,deletechars]) -> string\n\
2223\n\
2224Return a copy of the string S, where all characters occurring\n\
2225in the optional argument deletechars are removed, and the\n\
2226remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002227translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228
2229static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002230string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 register char *input, *output;
2233 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 register int i, c, changed = 0;
2235 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 int inlen, tablen, dellen = 0;
2238 PyObject *result;
2239 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002242 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245
2246 if (PyString_Check(tableobj)) {
2247 table1 = PyString_AS_STRING(tableobj);
2248 tablen = PyString_GET_SIZE(tableobj);
2249 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002250#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002251 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002252 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 parameter; instead a mapping to None will cause characters
2254 to be deleted. */
2255 if (delobj != NULL) {
2256 PyErr_SetString(PyExc_TypeError,
2257 "deletions are implemented differently for unicode");
2258 return NULL;
2259 }
2260 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2261 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002262#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265
Martin v. Löwis00b61272002-12-12 20:03:19 +00002266 if (tablen != 256) {
2267 PyErr_SetString(PyExc_ValueError,
2268 "translation table must be 256 characters long");
2269 return NULL;
2270 }
2271
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 if (delobj != NULL) {
2273 if (PyString_Check(delobj)) {
2274 del_table = PyString_AS_STRING(delobj);
2275 dellen = PyString_GET_SIZE(delobj);
2276 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002277#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 else if (PyUnicode_Check(delobj)) {
2279 PyErr_SetString(PyExc_TypeError,
2280 "deletions are implemented differently for unicode");
2281 return NULL;
2282 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002283#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2285 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 }
2287 else {
2288 del_table = NULL;
2289 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290 }
2291
2292 table = table1;
2293 inlen = PyString_Size(input_obj);
2294 result = PyString_FromStringAndSize((char *)NULL, inlen);
2295 if (result == NULL)
2296 return NULL;
2297 output_start = output = PyString_AsString(result);
2298 input = PyString_AsString(input_obj);
2299
2300 if (dellen == 0) {
2301 /* If no deletions are required, use faster code */
2302 for (i = inlen; --i >= 0; ) {
2303 c = Py_CHARMASK(*input++);
2304 if (Py_CHARMASK((*output++ = table[c])) != c)
2305 changed = 1;
2306 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002307 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 return result;
2309 Py_DECREF(result);
2310 Py_INCREF(input_obj);
2311 return input_obj;
2312 }
2313
2314 for (i = 0; i < 256; i++)
2315 trans_table[i] = Py_CHARMASK(table[i]);
2316
2317 for (i = 0; i < dellen; i++)
2318 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2319
2320 for (i = inlen; --i >= 0; ) {
2321 c = Py_CHARMASK(*input++);
2322 if (trans_table[c] != -1)
2323 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2324 continue;
2325 changed = 1;
2326 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002327 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328 Py_DECREF(result);
2329 Py_INCREF(input_obj);
2330 return input_obj;
2331 }
2332 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002333 if (inlen > 0)
2334 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335 return result;
2336}
2337
2338
2339/* What follows is used for implementing replace(). Perry Stoll. */
2340
2341/*
2342 mymemfind
2343
2344 strstr replacement for arbitrary blocks of memory.
2345
Barry Warsaw51ac5802000-03-20 16:36:48 +00002346 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347 contents of memory pointed to by PAT. Returns the index into MEM if
2348 found, or -1 if not found. If len of PAT is greater than length of
2349 MEM, the function returns -1.
2350*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002351static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002352mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353{
2354 register int ii;
2355
2356 /* pattern can not occur in the last pat_len-1 chars */
2357 len -= pat_len;
2358
2359 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002360 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 return ii;
2362 }
2363 }
2364 return -1;
2365}
2366
2367/*
2368 mymemcnt
2369
2370 Return the number of distinct times PAT is found in MEM.
2371 meaning mem=1111 and pat==11 returns 2.
2372 mem=11111 and pat==11 also return 2.
2373 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002374static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002375mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376{
2377 register int offset = 0;
2378 int nfound = 0;
2379
2380 while (len >= 0) {
2381 offset = mymemfind(mem, len, pat, pat_len);
2382 if (offset == -1)
2383 break;
2384 mem += offset + pat_len;
2385 len -= offset + pat_len;
2386 nfound++;
2387 }
2388 return nfound;
2389}
2390
2391/*
2392 mymemreplace
2393
Thomas Wouters7e474022000-07-16 12:04:32 +00002394 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 replaced with SUB.
2396
Thomas Wouters7e474022000-07-16 12:04:32 +00002397 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 of PAT in STR, then the original string is returned. Otherwise, a new
2399 string is allocated here and returned.
2400
2401 on return, out_len is:
2402 the length of output string, or
2403 -1 if the input string is returned, or
2404 unchanged if an error occurs (no memory).
2405
2406 return value is:
2407 the new string allocated locally, or
2408 NULL if an error occurred.
2409*/
2410static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002411mymemreplace(const char *str, int len, /* input string */
2412 const char *pat, int pat_len, /* pattern string to find */
2413 const char *sub, int sub_len, /* substitution string */
2414 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002415 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416{
2417 char *out_s;
2418 char *new_s;
2419 int nfound, offset, new_len;
2420
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002421 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 goto return_same;
2423
2424 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002425 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002426 if (count < 0)
2427 count = INT_MAX;
2428 else if (nfound > count)
2429 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 if (nfound == 0)
2431 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002432
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002434 if (new_len == 0) {
2435 /* Have to allocate something for the caller to free(). */
2436 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002437 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002438 return NULL;
2439 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002441 else {
2442 assert(new_len > 0);
2443 new_s = (char *)PyMem_MALLOC(new_len);
2444 if (new_s == NULL)
2445 return NULL;
2446 out_s = new_s;
2447
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002448 if (pat_len > 0) {
2449 for (; nfound > 0; --nfound) {
2450 /* find index of next instance of pattern */
2451 offset = mymemfind(str, len, pat, pat_len);
2452 if (offset == -1)
2453 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002454
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002455 /* copy non matching part of input string */
2456 memcpy(new_s, str, offset);
2457 str += offset + pat_len;
2458 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002459
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002460 /* copy substitute into the output string */
2461 new_s += offset;
2462 memcpy(new_s, sub, sub_len);
2463 new_s += sub_len;
2464 }
2465 /* copy any remaining values into output string */
2466 if (len > 0)
2467 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002468 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002469 else {
2470 for (;;++str, --len) {
2471 memcpy(new_s, sub, sub_len);
2472 new_s += sub_len;
2473 if (--nfound <= 0) {
2474 memcpy(new_s, str, len);
2475 break;
2476 }
2477 *new_s++ = *str;
2478 }
2479 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002480 }
2481 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482 return out_s;
2483
2484 return_same:
2485 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002486 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487}
2488
2489
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002490PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002491"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492\n\
2493Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002494old replaced by new. If the optional argument count is\n\
2495given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496
2497static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002498string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 const char *str = PyString_AS_STRING(self), *sub, *repl;
2501 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002502 const int len = PyString_GET_SIZE(self);
2503 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002506 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507
Guido van Rossum4c08d552000-03-10 22:55:18 +00002508 if (!PyArg_ParseTuple(args, "OO|i:replace",
2509 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511
2512 if (PyString_Check(subobj)) {
2513 sub = PyString_AS_STRING(subobj);
2514 sub_len = PyString_GET_SIZE(subobj);
2515 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002516#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002518 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002520#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2522 return NULL;
2523
2524 if (PyString_Check(replobj)) {
2525 repl = PyString_AS_STRING(replobj);
2526 repl_len = PyString_GET_SIZE(replobj);
2527 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002528#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002530 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002532#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2534 return NULL;
2535
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537 if (new_s == NULL) {
2538 PyErr_NoMemory();
2539 return NULL;
2540 }
2541 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002542 if (PyString_CheckExact(self)) {
2543 /* we're returning another reference to self */
2544 new = (PyObject*)self;
2545 Py_INCREF(new);
2546 }
2547 else {
2548 new = PyString_FromStringAndSize(str, len);
2549 if (new == NULL)
2550 return NULL;
2551 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552 }
2553 else {
2554 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002555 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556 }
2557 return new;
2558}
2559
2560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002561PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002562"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002564Return True if S starts with the specified prefix, False otherwise.\n\
2565With optional start, test S beginning at that position.\n\
2566With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567
2568static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002569string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574 int plen;
2575 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002576 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002578
Guido van Rossumc6821402000-05-08 14:08:05 +00002579 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2580 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002581 return NULL;
2582 if (PyString_Check(subobj)) {
2583 prefix = PyString_AS_STRING(subobj);
2584 plen = PyString_GET_SIZE(subobj);
2585 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002586#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002587 else if (PyUnicode_Check(subobj)) {
2588 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002589 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002590 subobj, start, end, -1);
2591 if (rc == -1)
2592 return NULL;
2593 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002594 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002595 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002596#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002597 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002598 return NULL;
2599
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002600 string_adjust_indices(&start, &end, len);
2601
2602 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002604
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002605 if (end-start >= plen)
2606 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2607 else
2608 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002609}
2610
2611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002612PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002613"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002615Return True if S ends with the specified suffix, False otherwise.\n\
2616With optional start, test S beginning at that position.\n\
2617With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618
2619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002620string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002622 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 const char* suffix;
2625 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002626 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002627 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002629
Guido van Rossumc6821402000-05-08 14:08:05 +00002630 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2631 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002632 return NULL;
2633 if (PyString_Check(subobj)) {
2634 suffix = PyString_AS_STRING(subobj);
2635 slen = PyString_GET_SIZE(subobj);
2636 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002637#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002638 else if (PyUnicode_Check(subobj)) {
2639 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002640 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002641 subobj, start, end, +1);
2642 if (rc == -1)
2643 return NULL;
2644 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002645 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002646 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002647#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002649 return NULL;
2650
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002651 string_adjust_indices(&start, &end, len);
2652
2653 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002654 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002655
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002656 if (end-slen > start)
2657 start = end - slen;
2658 if (end-start >= slen)
2659 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2660 else
2661 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662}
2663
2664
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002665PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002666"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002667\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002668Encodes S using the codec registered for encoding. encoding defaults\n\
2669to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002670handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002671a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2672'xmlcharrefreplace' as well as any other name registered with\n\
2673codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002674
2675static PyObject *
2676string_encode(PyStringObject *self, PyObject *args)
2677{
2678 char *encoding = NULL;
2679 char *errors = NULL;
2680 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2681 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002682 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2683}
2684
2685
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002686PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002687"S.decode([encoding[,errors]]) -> object\n\
2688\n\
2689Decodes S using the codec registered for encoding. encoding defaults\n\
2690to the default encoding. errors may be given to set a different error\n\
2691handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002692a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2693as well as any other name registerd with codecs.register_error that is\n\
2694able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002695
2696static PyObject *
2697string_decode(PyStringObject *self, PyObject *args)
2698{
2699 char *encoding = NULL;
2700 char *errors = NULL;
2701 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2702 return NULL;
2703 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002704}
2705
2706
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002707PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002708"S.expandtabs([tabsize]) -> string\n\
2709\n\
2710Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002711If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712
2713static PyObject*
2714string_expandtabs(PyStringObject *self, PyObject *args)
2715{
2716 const char *e, *p;
2717 char *q;
2718 int i, j;
2719 PyObject *u;
2720 int tabsize = 8;
2721
2722 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2723 return NULL;
2724
Thomas Wouters7e474022000-07-16 12:04:32 +00002725 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002726 i = j = 0;
2727 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2728 for (p = PyString_AS_STRING(self); p < e; p++)
2729 if (*p == '\t') {
2730 if (tabsize > 0)
2731 j += tabsize - (j % tabsize);
2732 }
2733 else {
2734 j++;
2735 if (*p == '\n' || *p == '\r') {
2736 i += j;
2737 j = 0;
2738 }
2739 }
2740
2741 /* Second pass: create output string and fill it */
2742 u = PyString_FromStringAndSize(NULL, i + j);
2743 if (!u)
2744 return NULL;
2745
2746 j = 0;
2747 q = PyString_AS_STRING(u);
2748
2749 for (p = PyString_AS_STRING(self); p < e; p++)
2750 if (*p == '\t') {
2751 if (tabsize > 0) {
2752 i = tabsize - (j % tabsize);
2753 j += i;
2754 while (i--)
2755 *q++ = ' ';
2756 }
2757 }
2758 else {
2759 j++;
2760 *q++ = *p;
2761 if (*p == '\n' || *p == '\r')
2762 j = 0;
2763 }
2764
2765 return u;
2766}
2767
Tim Peters8fa5dd02001-09-12 02:18:30 +00002768static PyObject *
2769pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002770{
2771 PyObject *u;
2772
2773 if (left < 0)
2774 left = 0;
2775 if (right < 0)
2776 right = 0;
2777
Tim Peters8fa5dd02001-09-12 02:18:30 +00002778 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779 Py_INCREF(self);
2780 return (PyObject *)self;
2781 }
2782
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002783 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784 left + PyString_GET_SIZE(self) + right);
2785 if (u) {
2786 if (left)
2787 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002788 memcpy(PyString_AS_STRING(u) + left,
2789 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002790 PyString_GET_SIZE(self));
2791 if (right)
2792 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2793 fill, right);
2794 }
2795
2796 return u;
2797}
2798
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002799PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002800"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002801"\n"
2802"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002803"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002804
2805static PyObject *
2806string_ljust(PyStringObject *self, PyObject *args)
2807{
2808 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002809 char fillchar = ' ';
2810
2811 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002812 return NULL;
2813
Tim Peters8fa5dd02001-09-12 02:18:30 +00002814 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002815 Py_INCREF(self);
2816 return (PyObject*) self;
2817 }
2818
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002819 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820}
2821
2822
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002823PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002824"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002825"\n"
2826"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002827"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828
2829static PyObject *
2830string_rjust(PyStringObject *self, PyObject *args)
2831{
2832 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002833 char fillchar = ' ';
2834
2835 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836 return NULL;
2837
Tim Peters8fa5dd02001-09-12 02:18:30 +00002838 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002839 Py_INCREF(self);
2840 return (PyObject*) self;
2841 }
2842
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002843 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844}
2845
2846
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002847PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002848"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002849"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002850"Return S centered in a string of length width. Padding is\n"
2851"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852
2853static PyObject *
2854string_center(PyStringObject *self, PyObject *args)
2855{
2856 int marg, left;
2857 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002858 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002859
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002860 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861 return NULL;
2862
Tim Peters8fa5dd02001-09-12 02:18:30 +00002863 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864 Py_INCREF(self);
2865 return (PyObject*) self;
2866 }
2867
2868 marg = width - PyString_GET_SIZE(self);
2869 left = marg / 2 + (marg & width & 1);
2870
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002871 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872}
2873
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002874PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002875"S.zfill(width) -> string\n"
2876"\n"
2877"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002878"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002879
2880static PyObject *
2881string_zfill(PyStringObject *self, PyObject *args)
2882{
2883 int fill;
2884 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002885 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002886
2887 int width;
2888 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2889 return NULL;
2890
2891 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002892 if (PyString_CheckExact(self)) {
2893 Py_INCREF(self);
2894 return (PyObject*) self;
2895 }
2896 else
2897 return PyString_FromStringAndSize(
2898 PyString_AS_STRING(self),
2899 PyString_GET_SIZE(self)
2900 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002901 }
2902
2903 fill = width - PyString_GET_SIZE(self);
2904
2905 s = pad(self, fill, 0, '0');
2906
2907 if (s == NULL)
2908 return NULL;
2909
2910 p = PyString_AS_STRING(s);
2911 if (p[fill] == '+' || p[fill] == '-') {
2912 /* move sign to beginning of string */
2913 p[0] = p[fill];
2914 p[fill] = '0';
2915 }
2916
2917 return (PyObject*) s;
2918}
2919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002920PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002921"S.isspace() -> bool\n\
2922\n\
2923Return True if all characters in S are whitespace\n\
2924and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925
2926static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002927string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002928{
Fred Drakeba096332000-07-09 07:04:36 +00002929 register const unsigned char *p
2930 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002931 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002932
Guido van Rossum4c08d552000-03-10 22:55:18 +00002933 /* Shortcut for single character strings */
2934 if (PyString_GET_SIZE(self) == 1 &&
2935 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002936 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002937
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002938 /* Special case for empty strings */
2939 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002940 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002941
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 e = p + PyString_GET_SIZE(self);
2943 for (; p < e; p++) {
2944 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002945 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002947 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948}
2949
2950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002951PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002952"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002953\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002954Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002955and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002956
2957static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002958string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002959{
Fred Drakeba096332000-07-09 07:04:36 +00002960 register const unsigned char *p
2961 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002962 register const unsigned char *e;
2963
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002964 /* Shortcut for single character strings */
2965 if (PyString_GET_SIZE(self) == 1 &&
2966 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002967 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002968
2969 /* Special case for empty strings */
2970 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002971 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002972
2973 e = p + PyString_GET_SIZE(self);
2974 for (; p < e; p++) {
2975 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002976 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002977 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002978 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002979}
2980
2981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002982PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002983"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002984\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002985Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002986and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002987
2988static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002989string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002990{
Fred Drakeba096332000-07-09 07:04:36 +00002991 register const unsigned char *p
2992 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002993 register const unsigned char *e;
2994
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002995 /* Shortcut for single character strings */
2996 if (PyString_GET_SIZE(self) == 1 &&
2997 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002998 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002999
3000 /* Special case for empty strings */
3001 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003002 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003003
3004 e = p + PyString_GET_SIZE(self);
3005 for (; p < e; p++) {
3006 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003008 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003009 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003010}
3011
3012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003013PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003014"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003015\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003016Return True if all characters in S are digits\n\
3017and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003018
3019static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003020string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003021{
Fred Drakeba096332000-07-09 07:04:36 +00003022 register const unsigned char *p
3023 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003024 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003025
Guido van Rossum4c08d552000-03-10 22:55:18 +00003026 /* Shortcut for single character strings */
3027 if (PyString_GET_SIZE(self) == 1 &&
3028 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003029 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003030
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003031 /* Special case for empty strings */
3032 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003034
Guido van Rossum4c08d552000-03-10 22:55:18 +00003035 e = p + PyString_GET_SIZE(self);
3036 for (; p < e; p++) {
3037 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003038 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003040 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003041}
3042
3043
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003044PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003045"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003046\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003047Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003048at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003049
3050static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003051string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003052{
Fred Drakeba096332000-07-09 07:04:36 +00003053 register const unsigned char *p
3054 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003055 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056 int cased;
3057
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058 /* Shortcut for single character strings */
3059 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003060 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003062 /* Special case for empty strings */
3063 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003064 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003065
Guido van Rossum4c08d552000-03-10 22:55:18 +00003066 e = p + PyString_GET_SIZE(self);
3067 cased = 0;
3068 for (; p < e; p++) {
3069 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071 else if (!cased && islower(*p))
3072 cased = 1;
3073 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003074 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075}
3076
3077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003078PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003079"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003081Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003082at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083
3084static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003085string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086{
Fred Drakeba096332000-07-09 07:04:36 +00003087 register const unsigned char *p
3088 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003089 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 int cased;
3091
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092 /* Shortcut for single character strings */
3093 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003096 /* Special case for empty strings */
3097 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003098 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003099
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100 e = p + PyString_GET_SIZE(self);
3101 cased = 0;
3102 for (; p < e; p++) {
3103 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003104 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003105 else if (!cased && isupper(*p))
3106 cased = 1;
3107 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003108 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109}
3110
3111
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003112PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003113"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003115Return True if S is a titlecased string and there is at least one\n\
3116character in S, i.e. uppercase characters may only follow uncased\n\
3117characters and lowercase characters only cased ones. Return False\n\
3118otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003119
3120static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003121string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003122{
Fred Drakeba096332000-07-09 07:04:36 +00003123 register const unsigned char *p
3124 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003125 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003126 int cased, previous_is_cased;
3127
Guido van Rossum4c08d552000-03-10 22:55:18 +00003128 /* Shortcut for single character strings */
3129 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003130 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003132 /* Special case for empty strings */
3133 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003134 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003135
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136 e = p + PyString_GET_SIZE(self);
3137 cased = 0;
3138 previous_is_cased = 0;
3139 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003140 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141
3142 if (isupper(ch)) {
3143 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003144 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 previous_is_cased = 1;
3146 cased = 1;
3147 }
3148 else if (islower(ch)) {
3149 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003150 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 previous_is_cased = 1;
3152 cased = 1;
3153 }
3154 else
3155 previous_is_cased = 0;
3156 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003157 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003158}
3159
3160
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003161PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003162"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163\n\
3164Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003165Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003166is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168static PyObject*
3169string_splitlines(PyStringObject *self, PyObject *args)
3170{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171 register int i;
3172 register int j;
3173 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003174 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175 PyObject *list;
3176 PyObject *str;
3177 char *data;
3178
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003179 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 return NULL;
3181
3182 data = PyString_AS_STRING(self);
3183 len = PyString_GET_SIZE(self);
3184
Guido van Rossum4c08d552000-03-10 22:55:18 +00003185 list = PyList_New(0);
3186 if (!list)
3187 goto onError;
3188
3189 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003190 int eol;
3191
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192 /* Find a line and append it */
3193 while (i < len && data[i] != '\n' && data[i] != '\r')
3194 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195
3196 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003197 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198 if (i < len) {
3199 if (data[i] == '\r' && i + 1 < len &&
3200 data[i+1] == '\n')
3201 i += 2;
3202 else
3203 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003204 if (keepends)
3205 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003207 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 j = i;
3209 }
3210 if (j < len) {
3211 SPLIT_APPEND(data, j, len);
3212 }
3213
3214 return list;
3215
3216 onError:
3217 Py_DECREF(list);
3218 return NULL;
3219}
3220
3221#undef SPLIT_APPEND
3222
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003223static PyObject *
3224string_getnewargs(PyStringObject *v)
3225{
3226 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3227}
3228
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003229
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003230static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003231string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232 /* Counterparts of the obsolete stropmodule functions; except
3233 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003234 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3235 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003236 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003237 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3238 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003239 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3240 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3241 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3242 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3243 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3244 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3245 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003246 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3247 capitalize__doc__},
3248 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3249 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3250 endswith__doc__},
3251 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3252 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3253 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3254 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3255 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3256 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3257 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3258 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3259 startswith__doc__},
3260 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3261 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3262 swapcase__doc__},
3263 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3264 translate__doc__},
3265 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3266 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3267 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3268 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3269 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3270 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3271 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3272 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3273 expandtabs__doc__},
3274 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3275 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003276 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003277 {NULL, NULL} /* sentinel */
3278};
3279
Jeremy Hylton938ace62002-07-17 16:30:39 +00003280static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003281str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3282
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003283static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003284string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003285{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003286 PyObject *x = NULL;
3287 static char *kwlist[] = {"object", 0};
3288
Guido van Rossumae960af2001-08-30 03:11:59 +00003289 if (type != &PyString_Type)
3290 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003291 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3292 return NULL;
3293 if (x == NULL)
3294 return PyString_FromString("");
3295 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003296}
3297
Guido van Rossumae960af2001-08-30 03:11:59 +00003298static PyObject *
3299str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3300{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003301 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003302 int n;
3303
3304 assert(PyType_IsSubtype(type, &PyString_Type));
3305 tmp = string_new(&PyString_Type, args, kwds);
3306 if (tmp == NULL)
3307 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003308 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003309 n = PyString_GET_SIZE(tmp);
3310 pnew = type->tp_alloc(type, n);
3311 if (pnew != NULL) {
3312 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003313 ((PyStringObject *)pnew)->ob_shash =
3314 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003315 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003316 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003317 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003318 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003319}
3320
Guido van Rossumcacfc072002-05-24 19:01:59 +00003321static PyObject *
3322basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3323{
3324 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003325 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003326 return NULL;
3327}
3328
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003329static PyObject *
3330string_mod(PyObject *v, PyObject *w)
3331{
3332 if (!PyString_Check(v)) {
3333 Py_INCREF(Py_NotImplemented);
3334 return Py_NotImplemented;
3335 }
3336 return PyString_Format(v, w);
3337}
3338
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003339PyDoc_STRVAR(basestring_doc,
3340"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003341
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003342static PyNumberMethods string_as_number = {
3343 0, /*nb_add*/
3344 0, /*nb_subtract*/
3345 0, /*nb_multiply*/
3346 0, /*nb_divide*/
3347 string_mod, /*nb_remainder*/
3348};
3349
3350
Guido van Rossumcacfc072002-05-24 19:01:59 +00003351PyTypeObject PyBaseString_Type = {
3352 PyObject_HEAD_INIT(&PyType_Type)
3353 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003354 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003355 0,
3356 0,
3357 0, /* tp_dealloc */
3358 0, /* tp_print */
3359 0, /* tp_getattr */
3360 0, /* tp_setattr */
3361 0, /* tp_compare */
3362 0, /* tp_repr */
3363 0, /* tp_as_number */
3364 0, /* tp_as_sequence */
3365 0, /* tp_as_mapping */
3366 0, /* tp_hash */
3367 0, /* tp_call */
3368 0, /* tp_str */
3369 0, /* tp_getattro */
3370 0, /* tp_setattro */
3371 0, /* tp_as_buffer */
3372 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3373 basestring_doc, /* tp_doc */
3374 0, /* tp_traverse */
3375 0, /* tp_clear */
3376 0, /* tp_richcompare */
3377 0, /* tp_weaklistoffset */
3378 0, /* tp_iter */
3379 0, /* tp_iternext */
3380 0, /* tp_methods */
3381 0, /* tp_members */
3382 0, /* tp_getset */
3383 &PyBaseObject_Type, /* tp_base */
3384 0, /* tp_dict */
3385 0, /* tp_descr_get */
3386 0, /* tp_descr_set */
3387 0, /* tp_dictoffset */
3388 0, /* tp_init */
3389 0, /* tp_alloc */
3390 basestring_new, /* tp_new */
3391 0, /* tp_free */
3392};
3393
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003394PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003395"str(object) -> string\n\
3396\n\
3397Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003398If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003399
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003400PyTypeObject PyString_Type = {
3401 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003402 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003403 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003404 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003405 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003406 (destructor)string_dealloc, /* tp_dealloc */
3407 (printfunc)string_print, /* tp_print */
3408 0, /* tp_getattr */
3409 0, /* tp_setattr */
3410 0, /* tp_compare */
3411 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003412 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003413 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003414 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003415 (hashfunc)string_hash, /* tp_hash */
3416 0, /* tp_call */
3417 (reprfunc)string_str, /* tp_str */
3418 PyObject_GenericGetAttr, /* tp_getattro */
3419 0, /* tp_setattro */
3420 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003421 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3422 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003423 string_doc, /* tp_doc */
3424 0, /* tp_traverse */
3425 0, /* tp_clear */
3426 (richcmpfunc)string_richcompare, /* tp_richcompare */
3427 0, /* tp_weaklistoffset */
3428 0, /* tp_iter */
3429 0, /* tp_iternext */
3430 string_methods, /* tp_methods */
3431 0, /* tp_members */
3432 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003433 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003434 0, /* tp_dict */
3435 0, /* tp_descr_get */
3436 0, /* tp_descr_set */
3437 0, /* tp_dictoffset */
3438 0, /* tp_init */
3439 0, /* tp_alloc */
3440 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003441 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003442};
3443
3444void
Fred Drakeba096332000-07-09 07:04:36 +00003445PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003446{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003447 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003448 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003449 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003450 if (w == NULL || !PyString_Check(*pv)) {
3451 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003452 *pv = NULL;
3453 return;
3454 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003455 v = string_concat((PyStringObject *) *pv, w);
3456 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003457 *pv = v;
3458}
3459
Guido van Rossum013142a1994-08-30 08:19:36 +00003460void
Fred Drakeba096332000-07-09 07:04:36 +00003461PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003462{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003463 PyString_Concat(pv, w);
3464 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003465}
3466
3467
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003468/* The following function breaks the notion that strings are immutable:
3469 it changes the size of a string. We get away with this only if there
3470 is only one module referencing the object. You can also think of it
3471 as creating a new string object and destroying the old one, only
3472 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003473 already be known to some other part of the code...
3474 Note that if there's not enough memory to resize the string, the original
3475 string object at *pv is deallocated, *pv is set to NULL, an "out of
3476 memory" exception is set, and -1 is returned. Else (on success) 0 is
3477 returned, and the value in *pv may or may not be the same as on input.
3478 As always, an extra byte is allocated for a trailing \0 byte (newsize
3479 does *not* include that), and a trailing \0 byte is stored.
3480*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003481
3482int
Fred Drakeba096332000-07-09 07:04:36 +00003483_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003484{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003485 register PyObject *v;
3486 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003487 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003488 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003489 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003490 Py_DECREF(v);
3491 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003492 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003493 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003494 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003495 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 _Py_ForgetReference(v);
3497 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003498 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003499 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003500 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003501 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003502 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003503 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003504 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003505 _Py_NewReference(*pv);
3506 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003507 sv->ob_size = newsize;
3508 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003509 return 0;
3510}
Guido van Rossume5372401993-03-16 12:15:04 +00003511
3512/* Helpers for formatstring */
3513
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003514static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003515getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003516{
3517 int argidx = *p_argidx;
3518 if (argidx < arglen) {
3519 (*p_argidx)++;
3520 if (arglen < 0)
3521 return args;
3522 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003523 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003524 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003525 PyErr_SetString(PyExc_TypeError,
3526 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003527 return NULL;
3528}
3529
Tim Peters38fd5b62000-09-21 05:43:11 +00003530/* Format codes
3531 * F_LJUST '-'
3532 * F_SIGN '+'
3533 * F_BLANK ' '
3534 * F_ALT '#'
3535 * F_ZERO '0'
3536 */
Guido van Rossume5372401993-03-16 12:15:04 +00003537#define F_LJUST (1<<0)
3538#define F_SIGN (1<<1)
3539#define F_BLANK (1<<2)
3540#define F_ALT (1<<3)
3541#define F_ZERO (1<<4)
3542
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003543static int
Fred Drakeba096332000-07-09 07:04:36 +00003544formatfloat(char *buf, size_t buflen, int flags,
3545 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003546{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003547 /* fmt = '%#.' + `prec` + `type`
3548 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003549 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003550 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003551 x = PyFloat_AsDouble(v);
3552 if (x == -1.0 && PyErr_Occurred()) {
3553 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003554 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003555 }
Guido van Rossume5372401993-03-16 12:15:04 +00003556 if (prec < 0)
3557 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003558 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3559 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003560 /* Worst case length calc to ensure no buffer overrun:
3561
3562 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003563 fmt = %#.<prec>g
3564 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003565 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003566 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003567
3568 'f' formats:
3569 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3570 len = 1 + 50 + 1 + prec = 52 + prec
3571
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003572 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003573 always given), therefore increase the length by one.
3574
3575 */
3576 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3577 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003578 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003579 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003580 return -1;
3581 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003582 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3583 (flags&F_ALT) ? "#" : "",
3584 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003585 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003586 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003587}
3588
Tim Peters38fd5b62000-09-21 05:43:11 +00003589/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3590 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3591 * Python's regular ints.
3592 * Return value: a new PyString*, or NULL if error.
3593 * . *pbuf is set to point into it,
3594 * *plen set to the # of chars following that.
3595 * Caller must decref it when done using pbuf.
3596 * The string starting at *pbuf is of the form
3597 * "-"? ("0x" | "0X")? digit+
3598 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003599 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003600 * There will be at least prec digits, zero-filled on the left if
3601 * necessary to get that many.
3602 * val object to be converted
3603 * flags bitmask of format flags; only F_ALT is looked at
3604 * prec minimum number of digits; 0-fill on left if needed
3605 * type a character in [duoxX]; u acts the same as d
3606 *
3607 * CAUTION: o, x and X conversions on regular ints can never
3608 * produce a '-' sign, but can for Python's unbounded ints.
3609 */
3610PyObject*
3611_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3612 char **pbuf, int *plen)
3613{
3614 PyObject *result = NULL;
3615 char *buf;
3616 int i;
3617 int sign; /* 1 if '-', else 0 */
3618 int len; /* number of characters */
3619 int numdigits; /* len == numnondigits + numdigits */
3620 int numnondigits = 0;
3621
3622 switch (type) {
3623 case 'd':
3624 case 'u':
3625 result = val->ob_type->tp_str(val);
3626 break;
3627 case 'o':
3628 result = val->ob_type->tp_as_number->nb_oct(val);
3629 break;
3630 case 'x':
3631 case 'X':
3632 numnondigits = 2;
3633 result = val->ob_type->tp_as_number->nb_hex(val);
3634 break;
3635 default:
3636 assert(!"'type' not in [duoxX]");
3637 }
3638 if (!result)
3639 return NULL;
3640
3641 /* To modify the string in-place, there can only be one reference. */
3642 if (result->ob_refcnt != 1) {
3643 PyErr_BadInternalCall();
3644 return NULL;
3645 }
3646 buf = PyString_AsString(result);
3647 len = PyString_Size(result);
3648 if (buf[len-1] == 'L') {
3649 --len;
3650 buf[len] = '\0';
3651 }
3652 sign = buf[0] == '-';
3653 numnondigits += sign;
3654 numdigits = len - numnondigits;
3655 assert(numdigits > 0);
3656
Tim Petersfff53252001-04-12 18:38:48 +00003657 /* Get rid of base marker unless F_ALT */
3658 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003659 /* Need to skip 0x, 0X or 0. */
3660 int skipped = 0;
3661 switch (type) {
3662 case 'o':
3663 assert(buf[sign] == '0');
3664 /* If 0 is only digit, leave it alone. */
3665 if (numdigits > 1) {
3666 skipped = 1;
3667 --numdigits;
3668 }
3669 break;
3670 case 'x':
3671 case 'X':
3672 assert(buf[sign] == '0');
3673 assert(buf[sign + 1] == 'x');
3674 skipped = 2;
3675 numnondigits -= 2;
3676 break;
3677 }
3678 if (skipped) {
3679 buf += skipped;
3680 len -= skipped;
3681 if (sign)
3682 buf[0] = '-';
3683 }
3684 assert(len == numnondigits + numdigits);
3685 assert(numdigits > 0);
3686 }
3687
3688 /* Fill with leading zeroes to meet minimum width. */
3689 if (prec > numdigits) {
3690 PyObject *r1 = PyString_FromStringAndSize(NULL,
3691 numnondigits + prec);
3692 char *b1;
3693 if (!r1) {
3694 Py_DECREF(result);
3695 return NULL;
3696 }
3697 b1 = PyString_AS_STRING(r1);
3698 for (i = 0; i < numnondigits; ++i)
3699 *b1++ = *buf++;
3700 for (i = 0; i < prec - numdigits; i++)
3701 *b1++ = '0';
3702 for (i = 0; i < numdigits; i++)
3703 *b1++ = *buf++;
3704 *b1 = '\0';
3705 Py_DECREF(result);
3706 result = r1;
3707 buf = PyString_AS_STRING(result);
3708 len = numnondigits + prec;
3709 }
3710
3711 /* Fix up case for hex conversions. */
3712 switch (type) {
3713 case 'x':
3714 /* Need to convert all upper case letters to lower case. */
3715 for (i = 0; i < len; i++)
3716 if (buf[i] >= 'A' && buf[i] <= 'F')
3717 buf[i] += 'a'-'A';
3718 break;
3719 case 'X':
3720 /* Need to convert 0x to 0X (and -0x to -0X). */
3721 if (buf[sign + 1] == 'x')
3722 buf[sign + 1] = 'X';
3723 break;
3724 }
3725 *pbuf = buf;
3726 *plen = len;
3727 return result;
3728}
3729
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003730static int
Fred Drakeba096332000-07-09 07:04:36 +00003731formatint(char *buf, size_t buflen, int flags,
3732 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003733{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003734 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003735 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3736 + 1 + 1 = 24 */
3737 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003738 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003739 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003740
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003741 x = PyInt_AsLong(v);
3742 if (x == -1 && PyErr_Occurred()) {
3743 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003744 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003745 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003746 if (x < 0 && type == 'u') {
3747 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003748 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003749 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3750 sign = "-";
3751 else
3752 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003753 if (prec < 0)
3754 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003755
3756 if ((flags & F_ALT) &&
3757 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003758 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003759 * of issues that cause pain:
3760 * - when 0 is being converted, the C standard leaves off
3761 * the '0x' or '0X', which is inconsistent with other
3762 * %#x/%#X conversions and inconsistent with Python's
3763 * hex() function
3764 * - there are platforms that violate the standard and
3765 * convert 0 with the '0x' or '0X'
3766 * (Metrowerks, Compaq Tru64)
3767 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003768 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003769 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003770 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003771 * We can achieve the desired consistency by inserting our
3772 * own '0x' or '0X' prefix, and substituting %x/%X in place
3773 * of %#x/%#X.
3774 *
3775 * Note that this is the same approach as used in
3776 * formatint() in unicodeobject.c
3777 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003778 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3779 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003780 }
3781 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003782 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3783 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003784 prec, type);
3785 }
3786
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003787 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3788 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003789 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003790 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003791 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003792 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003793 return -1;
3794 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003795 if (sign[0])
3796 PyOS_snprintf(buf, buflen, fmt, -x);
3797 else
3798 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003799 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003800}
3801
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003802static int
Fred Drakeba096332000-07-09 07:04:36 +00003803formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003804{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003805 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003806 if (PyString_Check(v)) {
3807 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003808 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003809 }
3810 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003811 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003812 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003813 }
3814 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003815 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003816}
3817
Guido van Rossum013142a1994-08-30 08:19:36 +00003818
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003819/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3820
3821 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3822 chars are formatted. XXX This is a magic number. Each formatting
3823 routine does bounds checking to ensure no overflow, but a better
3824 solution may be to malloc a buffer of appropriate size for each
3825 format. For now, the current solution is sufficient.
3826*/
3827#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003828
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003829PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003830PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003831{
3832 char *fmt, *res;
3833 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003834 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003835 PyObject *result, *orig_args;
3836#ifdef Py_USING_UNICODE
3837 PyObject *v, *w;
3838#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003839 PyObject *dict = NULL;
3840 if (format == NULL || !PyString_Check(format) || args == NULL) {
3841 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003842 return NULL;
3843 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003844 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003845 fmt = PyString_AS_STRING(format);
3846 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003847 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003848 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003849 if (result == NULL)
3850 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003851 res = PyString_AsString(result);
3852 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003853 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003854 argidx = 0;
3855 }
3856 else {
3857 arglen = -1;
3858 argidx = -2;
3859 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003860 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3861 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003862 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003863 while (--fmtcnt >= 0) {
3864 if (*fmt != '%') {
3865 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003866 rescnt = fmtcnt + 100;
3867 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003868 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003869 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003870 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003871 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003872 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003873 }
3874 *res++ = *fmt++;
3875 }
3876 else {
3877 /* Got a format specifier */
3878 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003879 int width = -1;
3880 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003881 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003882 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883 PyObject *v = NULL;
3884 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003885 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003886 int sign;
3887 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003888 char formatbuf[FORMATBUFLEN];
3889 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003890#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003891 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003892 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003894
Guido van Rossumda9c2711996-12-05 21:58:58 +00003895 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003896 if (*fmt == '(') {
3897 char *keystart;
3898 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003899 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003900 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003901
3902 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003903 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003904 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003905 goto error;
3906 }
3907 ++fmt;
3908 --fmtcnt;
3909 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003910 /* Skip over balanced parentheses */
3911 while (pcount > 0 && --fmtcnt >= 0) {
3912 if (*fmt == ')')
3913 --pcount;
3914 else if (*fmt == '(')
3915 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003916 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003917 }
3918 keylen = fmt - keystart - 1;
3919 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003920 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003921 "incomplete format key");
3922 goto error;
3923 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003924 key = PyString_FromStringAndSize(keystart,
3925 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003926 if (key == NULL)
3927 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003928 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003929 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003930 args_owned = 0;
3931 }
3932 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003933 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003934 if (args == NULL) {
3935 goto error;
3936 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003937 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003938 arglen = -1;
3939 argidx = -2;
3940 }
Guido van Rossume5372401993-03-16 12:15:04 +00003941 while (--fmtcnt >= 0) {
3942 switch (c = *fmt++) {
3943 case '-': flags |= F_LJUST; continue;
3944 case '+': flags |= F_SIGN; continue;
3945 case ' ': flags |= F_BLANK; continue;
3946 case '#': flags |= F_ALT; continue;
3947 case '0': flags |= F_ZERO; continue;
3948 }
3949 break;
3950 }
3951 if (c == '*') {
3952 v = getnextarg(args, arglen, &argidx);
3953 if (v == NULL)
3954 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955 if (!PyInt_Check(v)) {
3956 PyErr_SetString(PyExc_TypeError,
3957 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003958 goto error;
3959 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003961 if (width < 0) {
3962 flags |= F_LJUST;
3963 width = -width;
3964 }
Guido van Rossume5372401993-03-16 12:15:04 +00003965 if (--fmtcnt >= 0)
3966 c = *fmt++;
3967 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003968 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003969 width = c - '0';
3970 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003971 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003972 if (!isdigit(c))
3973 break;
3974 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003975 PyErr_SetString(
3976 PyExc_ValueError,
3977 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003978 goto error;
3979 }
3980 width = width*10 + (c - '0');
3981 }
3982 }
3983 if (c == '.') {
3984 prec = 0;
3985 if (--fmtcnt >= 0)
3986 c = *fmt++;
3987 if (c == '*') {
3988 v = getnextarg(args, arglen, &argidx);
3989 if (v == NULL)
3990 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 if (!PyInt_Check(v)) {
3992 PyErr_SetString(
3993 PyExc_TypeError,
3994 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003995 goto error;
3996 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003997 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003998 if (prec < 0)
3999 prec = 0;
4000 if (--fmtcnt >= 0)
4001 c = *fmt++;
4002 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004003 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004004 prec = c - '0';
4005 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004006 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004007 if (!isdigit(c))
4008 break;
4009 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 PyErr_SetString(
4011 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004012 "prec too big");
4013 goto error;
4014 }
4015 prec = prec*10 + (c - '0');
4016 }
4017 }
4018 } /* prec */
4019 if (fmtcnt >= 0) {
4020 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004021 if (--fmtcnt >= 0)
4022 c = *fmt++;
4023 }
4024 }
4025 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004026 PyErr_SetString(PyExc_ValueError,
4027 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004028 goto error;
4029 }
4030 if (c != '%') {
4031 v = getnextarg(args, arglen, &argidx);
4032 if (v == NULL)
4033 goto error;
4034 }
4035 sign = 0;
4036 fill = ' ';
4037 switch (c) {
4038 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004039 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004040 len = 1;
4041 break;
4042 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004043#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004044 if (PyUnicode_Check(v)) {
4045 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004046 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004047 goto unicode;
4048 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004049#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004050 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004051 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004052 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004053 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004054 else
4055 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004056 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004057 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004058 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004059 /* XXX Note: this should never happen,
4060 since PyObject_Repr() and
4061 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004062 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004063 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004064 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004065 goto error;
4066 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004067 pbuf = PyString_AS_STRING(temp);
4068 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004069 if (prec >= 0 && len > prec)
4070 len = prec;
4071 break;
4072 case 'i':
4073 case 'd':
4074 case 'u':
4075 case 'o':
4076 case 'x':
4077 case 'X':
4078 if (c == 'i')
4079 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004080 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004081 temp = _PyString_FormatLong(v, flags,
4082 prec, c, &pbuf, &len);
4083 if (!temp)
4084 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004085 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004086 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004087 else {
4088 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004089 len = formatint(pbuf,
4090 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004091 flags, prec, c, v);
4092 if (len < 0)
4093 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004094 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004095 }
4096 if (flags & F_ZERO)
4097 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004098 break;
4099 case 'e':
4100 case 'E':
4101 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004102 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004103 case 'g':
4104 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004105 if (c == 'F')
4106 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004107 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004108 len = formatfloat(pbuf, sizeof(formatbuf),
4109 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004110 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004111 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004112 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004113 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004114 fill = '0';
4115 break;
4116 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004117#ifdef Py_USING_UNICODE
4118 if (PyUnicode_Check(v)) {
4119 fmt = fmt_start;
4120 argidx = argidx_start;
4121 goto unicode;
4122 }
4123#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004124 pbuf = formatbuf;
4125 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004126 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004127 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004128 break;
4129 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004130 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004131 "unsupported format character '%c' (0x%x) "
4132 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004133 c, c,
4134 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004135 goto error;
4136 }
4137 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004138 if (*pbuf == '-' || *pbuf == '+') {
4139 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004140 len--;
4141 }
4142 else if (flags & F_SIGN)
4143 sign = '+';
4144 else if (flags & F_BLANK)
4145 sign = ' ';
4146 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004147 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004148 }
4149 if (width < len)
4150 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004151 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004152 reslen -= rescnt;
4153 rescnt = width + fmtcnt + 100;
4154 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004155 if (reslen < 0) {
4156 Py_DECREF(result);
4157 return PyErr_NoMemory();
4158 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004159 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004160 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004161 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004162 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004163 }
4164 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004165 if (fill != ' ')
4166 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004167 rescnt--;
4168 if (width > len)
4169 width--;
4170 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004171 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4172 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004173 assert(pbuf[1] == c);
4174 if (fill != ' ') {
4175 *res++ = *pbuf++;
4176 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004177 }
Tim Petersfff53252001-04-12 18:38:48 +00004178 rescnt -= 2;
4179 width -= 2;
4180 if (width < 0)
4181 width = 0;
4182 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004183 }
4184 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004185 do {
4186 --rescnt;
4187 *res++ = fill;
4188 } while (--width > len);
4189 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004190 if (fill == ' ') {
4191 if (sign)
4192 *res++ = sign;
4193 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004194 (c == 'x' || c == 'X')) {
4195 assert(pbuf[0] == '0');
4196 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004197 *res++ = *pbuf++;
4198 *res++ = *pbuf++;
4199 }
4200 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004201 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004202 res += len;
4203 rescnt -= len;
4204 while (--width >= len) {
4205 --rescnt;
4206 *res++ = ' ';
4207 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004208 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004209 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004210 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004211 goto error;
4212 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004213 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004214 } /* '%' */
4215 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004216 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004217 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004218 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004219 goto error;
4220 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004221 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004222 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004223 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004224 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004225 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004226
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004227#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004228 unicode:
4229 if (args_owned) {
4230 Py_DECREF(args);
4231 args_owned = 0;
4232 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004233 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004234 if (PyTuple_Check(orig_args) && argidx > 0) {
4235 PyObject *v;
4236 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4237 v = PyTuple_New(n);
4238 if (v == NULL)
4239 goto error;
4240 while (--n >= 0) {
4241 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4242 Py_INCREF(w);
4243 PyTuple_SET_ITEM(v, n, w);
4244 }
4245 args = v;
4246 } else {
4247 Py_INCREF(orig_args);
4248 args = orig_args;
4249 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004250 args_owned = 1;
4251 /* Take what we have of the result and let the Unicode formatting
4252 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004253 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004254 if (_PyString_Resize(&result, rescnt))
4255 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004256 fmtcnt = PyString_GET_SIZE(format) - \
4257 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004258 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4259 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004260 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004261 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004262 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004263 if (v == NULL)
4264 goto error;
4265 /* Paste what we have (result) to what the Unicode formatting
4266 function returned (v) and return the result (or error) */
4267 w = PyUnicode_Concat(result, v);
4268 Py_DECREF(result);
4269 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004270 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004271 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004272#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004273
Guido van Rossume5372401993-03-16 12:15:04 +00004274 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004275 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004276 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004277 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004278 }
Guido van Rossume5372401993-03-16 12:15:04 +00004279 return NULL;
4280}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004281
Guido van Rossum2a61e741997-01-18 07:55:05 +00004282void
Fred Drakeba096332000-07-09 07:04:36 +00004283PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004284{
4285 register PyStringObject *s = (PyStringObject *)(*p);
4286 PyObject *t;
4287 if (s == NULL || !PyString_Check(s))
4288 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004289 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004290 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004291 if (interned == NULL) {
4292 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004293 if (interned == NULL) {
4294 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004295 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004296 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004297 }
4298 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4299 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004300 Py_DECREF(*p);
4301 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004302 return;
4303 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004304 /* Ensure that only true string objects appear in the intern dict */
4305 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004306 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4307 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004308 if (t == NULL) {
4309 PyErr_Clear();
4310 return;
Tim Peters111f6092001-09-12 07:54:51 +00004311 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004312 } else {
4313 t = (PyObject*) s;
4314 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004315 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004316
4317 if (PyDict_SetItem(interned, t, t) == 0) {
4318 /* The two references in interned are not counted by
4319 refcnt. The string deallocator will take care of this */
4320 ((PyObject *)t)->ob_refcnt-=2;
4321 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4322 Py_DECREF(*p);
4323 *p = t;
4324 return;
4325 }
4326 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004327 PyErr_Clear();
4328}
4329
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004330void
4331PyString_InternImmortal(PyObject **p)
4332{
4333 PyString_InternInPlace(p);
4334 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4335 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4336 Py_INCREF(*p);
4337 }
4338}
4339
Guido van Rossum2a61e741997-01-18 07:55:05 +00004340
4341PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004342PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343{
4344 PyObject *s = PyString_FromString(cp);
4345 if (s == NULL)
4346 return NULL;
4347 PyString_InternInPlace(&s);
4348 return s;
4349}
4350
Guido van Rossum8cf04761997-08-02 02:57:45 +00004351void
Fred Drakeba096332000-07-09 07:04:36 +00004352PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004353{
4354 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004355 for (i = 0; i < UCHAR_MAX + 1; i++) {
4356 Py_XDECREF(characters[i]);
4357 characters[i] = NULL;
4358 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004359 Py_XDECREF(nullstring);
4360 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004361}
Barry Warsawa903ad982001-02-23 16:40:48 +00004362
Barry Warsawa903ad982001-02-23 16:40:48 +00004363void _Py_ReleaseInternedStrings(void)
4364{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004365 PyObject *keys;
4366 PyStringObject *s;
4367 int i, n;
4368
4369 if (interned == NULL || !PyDict_Check(interned))
4370 return;
4371 keys = PyDict_Keys(interned);
4372 if (keys == NULL || !PyList_Check(keys)) {
4373 PyErr_Clear();
4374 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004375 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004376
4377 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4378 detector, interned strings are not forcibly deallocated; rather, we
4379 give them their stolen references back, and then clear and DECREF
4380 the interned dict. */
4381
4382 fprintf(stderr, "releasing interned strings\n");
4383 n = PyList_GET_SIZE(keys);
4384 for (i = 0; i < n; i++) {
4385 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4386 switch (s->ob_sstate) {
4387 case SSTATE_NOT_INTERNED:
4388 /* XXX Shouldn't happen */
4389 break;
4390 case SSTATE_INTERNED_IMMORTAL:
4391 s->ob_refcnt += 1;
4392 break;
4393 case SSTATE_INTERNED_MORTAL:
4394 s->ob_refcnt += 2;
4395 break;
4396 default:
4397 Py_FatalError("Inconsistent interned string state.");
4398 }
4399 s->ob_sstate = SSTATE_NOT_INTERNED;
4400 }
4401 Py_DECREF(keys);
4402 PyDict_Clear(interned);
4403 Py_DECREF(interned);
4404 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004405}