blob: cb0e20b52ba2114e36404eaca43ff7941541f5e0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
Martin v. Löwisd1327502001-12-02 18:09:41 +000022 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
23 to share string objects. When the size of the string is zero, these
24 routines always return a pointer to the same string object; when the size
25 is one, they return a pointer to an already existing object if the contents
26 of the string is known. For PyString_FromString() this is always the case,
27 for PyString_FromStringAndSize() this is the case when the first argument
28 in not NULL.
29
30 A common practice of allocating a string and then filling it in or changing
31 it must be done carefully. It is only allowed to change the contents of
32 the string if the object was gotten from PyString_FromStringAndSize() with
33 a NULL first argument, because in the future these routines may try to do
34 even more sharing of objects.
35
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036 The string in the `str' parameter does not have to be null-character
37 terminated. (Therefore it is safe to construct a substring by using
38 `PyString_FromStringAndSize(origstring, substrlen)'.)
39
40 The parameter `size' denotes number of characters to allocate, not
41 counting the null terminating character. If the `str' argument is
42 not NULL, then it points to a of length `size'. For
43 PyString_FromString, this string must be null-terminated.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
45 The member `op->ob_size' denotes the number of bytes of data in the string,
46 not counting the null terminating character, and is therefore equal to the
47 `size' parameter.
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000050PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000051{
Tim Peters9e897f42001-05-09 07:37:07 +000052 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 if (size == 0 && (op = nullstring) != NULL) {
55#ifdef COUNT_ALLOCS
56 null_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 if (size == 1 && str != NULL &&
62 (op = characters[*str & UCHAR_MAX]) != NULL)
63 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064#ifdef COUNT_ALLOCS
65 one_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000070#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
72 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000074 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef CACHE_HASH
79 op->ob_shash = -1;
80#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000081#ifdef INTERN_STRINGS
82 op->ob_sinterned = NULL;
83#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000087#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000101#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 if (size > INT_MAX) {
111 PyErr_SetString(PyExc_OverflowError,
112 "string is too long for a Python string");
113 return NULL;
114 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000115#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 if (size == 0 && (op = nullstring) != NULL) {
117#ifdef COUNT_ALLOCS
118 null_strings++;
119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 Py_INCREF(op);
121 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 }
123 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
124#ifdef COUNT_ALLOCS
125 one_strings++;
126#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000127 Py_INCREF(op);
128 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000129 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000130#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
132 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138#ifdef CACHE_HASH
139 op->ob_shash = -1;
140#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000141#ifdef INTERN_STRINGS
142 op->ob_sinterned = NULL;
143#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000144 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000145#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000149 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000153 PyObject *t = (PyObject *)op;
154 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000155 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000158 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000160 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000161}
162
Barry Warsawdadace02001-08-24 18:32:06 +0000163PyObject *
164PyString_FromFormatV(const char *format, va_list vargs)
165{
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000167 int n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
171
Tim Petersc15c4f12001-10-02 21:32:07 +0000172#ifdef VA_LIST_IS_ARRAY
173 memcpy(count, vargs, sizeof(va_list));
174#else
175 count = vargs;
176#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
181 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
182 ;
183
184 /* skip the 'l' in %ld, since it doesn't change the
185 width. although only %d is supported (see
186 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000187 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000188 if (*f == 'l' && *(f+1) == 'd')
189 ++f;
190
191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'i': case 'x':
199 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000214 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
231 expand:
232 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000235 string = PyString_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
238
239 s = PyString_AsString(string);
240
241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 int i, longflag = 0;
245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 if (*f == '.') {
251 f++;
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 }
256 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
257 f++;
258 /* handle the long flag, but only for %ld. others
259 can be added when necessary. */
260 if (*f == 'l' && *(f+1) == 'd') {
261 longflag = 1;
262 ++f;
263 }
264
265 switch (*f) {
266 case 'c':
267 *s++ = va_arg(vargs, int);
268 break;
269 case 'd':
270 if (longflag)
271 sprintf(s, "%ld", va_arg(vargs, long));
272 else
273 sprintf(s, "%d", va_arg(vargs, int));
274 s += strlen(s);
275 break;
276 case 'i':
277 sprintf(s, "%i", va_arg(vargs, int));
278 s += strlen(s);
279 break;
280 case 'x':
281 sprintf(s, "%x", va_arg(vargs, int));
282 s += strlen(s);
283 break;
284 case 's':
285 p = va_arg(vargs, char*);
286 i = strlen(p);
287 if (n > 0 && i > n)
288 i = n;
289 memcpy(s, p, i);
290 s += i;
291 break;
292 case 'p':
293 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000294 /* %p is ill-defined: ensure leading 0x. */
295 if (s[1] == 'X')
296 s[1] = 'x';
297 else if (s[1] != 'x') {
298 memmove(s+2, s, strlen(s)+1);
299 s[0] = '0';
300 s[1] = 'x';
301 }
Barry Warsawdadace02001-08-24 18:32:06 +0000302 s += strlen(s);
303 break;
304 case '%':
305 *s++ = '%';
306 break;
307 default:
308 strcpy(s, p);
309 s += strlen(s);
310 goto end;
311 }
312 } else
313 *s++ = *f;
314 }
315
316 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000317 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000318 return string;
319}
320
321PyObject *
322PyString_FromFormat(const char *format, ...)
323{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000324 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000325 va_list vargs;
326
327#ifdef HAVE_STDARG_PROTOTYPES
328 va_start(vargs, format);
329#else
330 va_start(vargs);
331#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000332 ret = PyString_FromFormatV(format, vargs);
333 va_end(vargs);
334 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000335}
336
337
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000338PyObject *PyString_Decode(const char *s,
339 int size,
340 const char *encoding,
341 const char *errors)
342{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000343 PyObject *v, *str;
344
345 str = PyString_FromStringAndSize(s, size);
346 if (str == NULL)
347 return NULL;
348 v = PyString_AsDecodedString(str, encoding, errors);
349 Py_DECREF(str);
350 return v;
351}
352
353PyObject *PyString_AsDecodedObject(PyObject *str,
354 const char *encoding,
355 const char *errors)
356{
357 PyObject *v;
358
359 if (!PyString_Check(str)) {
360 PyErr_BadArgument();
361 goto onError;
362 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000364 if (encoding == NULL) {
365#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000367#else
368 PyErr_SetString(PyExc_ValueError, "no encoding specified");
369 goto onError;
370#endif
371 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000372
373 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000374 v = PyCodec_Decode(str, encoding, errors);
375 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000376 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000377
378 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000379
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000380 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000381 return NULL;
382}
383
384PyObject *PyString_AsDecodedString(PyObject *str,
385 const char *encoding,
386 const char *errors)
387{
388 PyObject *v;
389
390 v = PyString_AsDecodedObject(str, encoding, errors);
391 if (v == NULL)
392 goto onError;
393
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000394#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395 /* Convert Unicode to a string using the default encoding */
396 if (PyUnicode_Check(v)) {
397 PyObject *temp = v;
398 v = PyUnicode_AsEncodedString(v, NULL, NULL);
399 Py_DECREF(temp);
400 if (v == NULL)
401 goto onError;
402 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000403#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000404 if (!PyString_Check(v)) {
405 PyErr_Format(PyExc_TypeError,
406 "decoder did not return a string object (type=%.400s)",
407 v->ob_type->tp_name);
408 Py_DECREF(v);
409 goto onError;
410 }
411
412 return v;
413
414 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000415 return NULL;
416}
417
418PyObject *PyString_Encode(const char *s,
419 int size,
420 const char *encoding,
421 const char *errors)
422{
423 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000424
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 str = PyString_FromStringAndSize(s, size);
426 if (str == NULL)
427 return NULL;
428 v = PyString_AsEncodedString(str, encoding, errors);
429 Py_DECREF(str);
430 return v;
431}
432
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000433PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 const char *encoding,
435 const char *errors)
436{
437 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000438
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000439 if (!PyString_Check(str)) {
440 PyErr_BadArgument();
441 goto onError;
442 }
443
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000444 if (encoding == NULL) {
445#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000447#else
448 PyErr_SetString(PyExc_ValueError, "no encoding specified");
449 goto onError;
450#endif
451 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452
453 /* Encode via the codec registry */
454 v = PyCodec_Encode(str, encoding, errors);
455 if (v == NULL)
456 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000457
458 return v;
459
460 onError:
461 return NULL;
462}
463
464PyObject *PyString_AsEncodedString(PyObject *str,
465 const char *encoding,
466 const char *errors)
467{
468 PyObject *v;
469
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000470 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000471 if (v == NULL)
472 goto onError;
473
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000474#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 /* Convert Unicode to a string using the default encoding */
476 if (PyUnicode_Check(v)) {
477 PyObject *temp = v;
478 v = PyUnicode_AsEncodedString(v, NULL, NULL);
479 Py_DECREF(temp);
480 if (v == NULL)
481 goto onError;
482 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000483#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000484 if (!PyString_Check(v)) {
485 PyErr_Format(PyExc_TypeError,
486 "encoder did not return a string object (type=%.400s)",
487 v->ob_type->tp_name);
488 Py_DECREF(v);
489 goto onError;
490 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000491
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000493
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000494 onError:
495 return NULL;
496}
497
Guido van Rossum234f9421993-06-17 12:35:49 +0000498static void
Fred Drakeba096332000-07-09 07:04:36 +0000499string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000500{
Guido van Rossum9475a232001-10-05 20:51:39 +0000501 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000502}
503
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000504static int
505string_getsize(register PyObject *op)
506{
507 char *s;
508 int len;
509 if (PyString_AsStringAndSize(op, &s, &len))
510 return -1;
511 return len;
512}
513
514static /*const*/ char *
515string_getbuffer(register PyObject *op)
516{
517 char *s;
518 int len;
519 if (PyString_AsStringAndSize(op, &s, &len))
520 return NULL;
521 return s;
522}
523
Guido van Rossumd7047b31995-01-02 19:07:15 +0000524int
Fred Drakeba096332000-07-09 07:04:36 +0000525PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527 if (!PyString_Check(op))
528 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530}
531
532/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000533PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000535 if (!PyString_Check(op))
536 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000537 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538}
539
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540int
541PyString_AsStringAndSize(register PyObject *obj,
542 register char **s,
543 register int *len)
544{
545 if (s == NULL) {
546 PyErr_BadInternalCall();
547 return -1;
548 }
549
550 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000551#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000552 if (PyUnicode_Check(obj)) {
553 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
554 if (obj == NULL)
555 return -1;
556 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000557 else
558#endif
559 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000560 PyErr_Format(PyExc_TypeError,
561 "expected string or Unicode object, "
562 "%.200s found", obj->ob_type->tp_name);
563 return -1;
564 }
565 }
566
567 *s = PyString_AS_STRING(obj);
568 if (len != NULL)
569 *len = PyString_GET_SIZE(obj);
570 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
571 PyErr_SetString(PyExc_TypeError,
572 "expected string without null bytes");
573 return -1;
574 }
575 return 0;
576}
577
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578/* Methods */
579
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000580static int
Fred Drakeba096332000-07-09 07:04:36 +0000581string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582{
583 int i;
584 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000585 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000586
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000587 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000588 if (! PyString_CheckExact(op)) {
589 int ret;
590 /* A str subclass may have its own __str__ method. */
591 op = (PyStringObject *) PyObject_Str((PyObject *)op);
592 if (op == NULL)
593 return -1;
594 ret = string_print(op, fp, flags);
595 Py_DECREF(op);
596 return ret;
597 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000598 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000599 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000600 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000602
Thomas Wouters7e474022000-07-16 12:04:32 +0000603 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000604 quote = '\'';
605 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
606 quote = '"';
607
608 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000609 for (i = 0; i < op->ob_size; i++) {
610 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000611 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000613 else if (c == '\t')
614 fprintf(fp, "\\t");
615 else if (c == '\n')
616 fprintf(fp, "\\n");
617 else if (c == '\r')
618 fprintf(fp, "\\r");
619 else if (c < ' ' || c >= 0x7f)
620 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000622 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000624 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000625 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626}
627
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000628static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000629string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000631 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
632 PyObject *v;
633 if (newsize > INT_MAX) {
634 PyErr_SetString(PyExc_OverflowError,
635 "string is too large to make repr");
636 }
637 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000638 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000639 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 }
641 else {
642 register int i;
643 register char c;
644 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 int quote;
646
Thomas Wouters7e474022000-07-16 12:04:32 +0000647 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000648 quote = '\'';
649 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
650 quote = '"';
651
Tim Peters9161c8b2001-12-03 01:55:38 +0000652 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000653 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000654 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000655 /* There's at least enough room for a hex escape
656 and a closing quote. */
657 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000659 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000660 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000661 else if (c == '\t')
662 *p++ = '\\', *p++ = 't';
663 else if (c == '\n')
664 *p++ = '\\', *p++ = 'n';
665 else if (c == '\r')
666 *p++ = '\\', *p++ = 'r';
667 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000668 /* For performance, we don't want to call
669 PyOS_snprintf here (extra layers of
670 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000671 sprintf(p, "\\x%02x", c & 0xff);
672 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 }
674 else
675 *p++ = c;
676 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000677 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000678 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000679 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000680 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000681 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000682 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684}
685
Guido van Rossum189f1df2001-05-01 16:51:53 +0000686static PyObject *
687string_str(PyObject *s)
688{
Tim Petersc9933152001-10-16 20:18:24 +0000689 assert(PyString_Check(s));
690 if (PyString_CheckExact(s)) {
691 Py_INCREF(s);
692 return s;
693 }
694 else {
695 /* Subtype -- return genuine string with the same value. */
696 PyStringObject *t = (PyStringObject *) s;
697 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
698 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000699}
700
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701static int
Fred Drakeba096332000-07-09 07:04:36 +0000702string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
704 return a->ob_size;
705}
706
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000707static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000708string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709{
710 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000711 register PyStringObject *op;
712 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000713#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000714 if (PyUnicode_Check(bb))
715 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000717 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000718 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000719 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000720 return NULL;
721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000724 if ((a->ob_size == 0 || b->ob_size == 0) &&
725 PyString_CheckExact(a) && PyString_CheckExact(b)) {
726 if (a->ob_size == 0) {
727 Py_INCREF(bb);
728 return bb;
729 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 Py_INCREF(a);
731 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732 }
733 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000734 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000736 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000737 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000738 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000739 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000740#ifdef CACHE_HASH
741 op->ob_shash = -1;
742#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000743#ifdef INTERN_STRINGS
744 op->ob_sinterned = NULL;
745#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000746 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
747 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
748 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000749 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750#undef b
751}
752
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000754string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000755{
756 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000757 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000758 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000759 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760 if (n < 0)
761 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000762 /* watch out for overflows: the size can overflow int,
763 * and the # of bytes needed can overflow size_t
764 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000766 if (n && size / n != a->ob_size) {
767 PyErr_SetString(PyExc_OverflowError,
768 "repeated string is too long");
769 return NULL;
770 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000771 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000772 Py_INCREF(a);
773 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Tim Peters8f422462000-09-09 06:13:41 +0000775 nbytes = size * sizeof(char);
776 if (nbytes / sizeof(char) != (size_t)size ||
777 nbytes + sizeof(PyStringObject) <= nbytes) {
778 PyErr_SetString(PyExc_OverflowError,
779 "repeated string is too long");
780 return NULL;
781 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000782 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000783 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000784 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000786 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000787#ifdef CACHE_HASH
788 op->ob_shash = -1;
789#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000790#ifdef INTERN_STRINGS
791 op->ob_sinterned = NULL;
792#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000793 for (i = 0; i < size; i += a->ob_size)
794 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
795 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
799/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
800
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000801static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000802string_slice(register PyStringObject *a, register int i, register int j)
803 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
805 if (i < 0)
806 i = 0;
807 if (j < 0)
808 j = 0; /* Avoid signed/unsigned bug in next line */
809 if (j > a->ob_size)
810 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000811 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
812 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000813 Py_INCREF(a);
814 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 if (j < i)
817 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000818 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000819}
820
Guido van Rossum9284a572000-03-07 15:53:43 +0000821static int
Fred Drakeba096332000-07-09 07:04:36 +0000822string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000823{
824 register char *s, *end;
825 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000826#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000827 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000828 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000829#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000830 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000831 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000832 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000833 return -1;
834 }
835 c = PyString_AsString(el)[0];
836 s = PyString_AsString(a);
837 end = s + PyString_Size(a);
838 while (s < end) {
839 if (c == *s++)
840 return 1;
841 }
842 return 0;
843}
844
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000845static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000846string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000848 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000849 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 return NULL;
853 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000854 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000855 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000856 if (v == NULL)
857 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000858 else {
859#ifdef COUNT_ALLOCS
860 one_strings++;
861#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000862 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000863 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000864 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865}
866
Martin v. Löwiscd353062001-05-24 16:56:35 +0000867static PyObject*
868string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000870 int c;
871 int len_a, len_b;
872 int min_len;
873 PyObject *result;
874
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000875 /* Make sure both arguments are strings. */
876 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000877 result = Py_NotImplemented;
878 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000879 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000880 if (a == b) {
881 switch (op) {
882 case Py_EQ:case Py_LE:case Py_GE:
883 result = Py_True;
884 goto out;
885 case Py_NE:case Py_LT:case Py_GT:
886 result = Py_False;
887 goto out;
888 }
889 }
890 if (op == Py_EQ) {
891 /* Supporting Py_NE here as well does not save
892 much time, since Py_NE is rarely used. */
893 if (a->ob_size == b->ob_size
894 && (a->ob_sval[0] == b->ob_sval[0]
895 && memcmp(a->ob_sval, b->ob_sval,
896 a->ob_size) == 0)) {
897 result = Py_True;
898 } else {
899 result = Py_False;
900 }
901 goto out;
902 }
903 len_a = a->ob_size; len_b = b->ob_size;
904 min_len = (len_a < len_b) ? len_a : len_b;
905 if (min_len > 0) {
906 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
907 if (c==0)
908 c = memcmp(a->ob_sval, b->ob_sval, min_len);
909 }else
910 c = 0;
911 if (c == 0)
912 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
913 switch (op) {
914 case Py_LT: c = c < 0; break;
915 case Py_LE: c = c <= 0; break;
916 case Py_EQ: assert(0); break; /* unreachable */
917 case Py_NE: c = c != 0; break;
918 case Py_GT: c = c > 0; break;
919 case Py_GE: c = c >= 0; break;
920 default:
921 result = Py_NotImplemented;
922 goto out;
923 }
924 result = c ? Py_True : Py_False;
925 out:
926 Py_INCREF(result);
927 return result;
928}
929
930int
931_PyString_Eq(PyObject *o1, PyObject *o2)
932{
933 PyStringObject *a, *b;
934 a = (PyStringObject*)o1;
935 b = (PyStringObject*)o2;
936 return a->ob_size == b->ob_size
937 && *a->ob_sval == *b->ob_sval
938 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939}
940
Guido van Rossum9bfef441993-03-29 10:43:31 +0000941static long
Fred Drakeba096332000-07-09 07:04:36 +0000942string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000943{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000944 register int len;
945 register unsigned char *p;
946 register long x;
947
948#ifdef CACHE_HASH
949 if (a->ob_shash != -1)
950 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000951#ifdef INTERN_STRINGS
952 if (a->ob_sinterned != NULL)
953 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000955#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000956#endif
957 len = a->ob_size;
958 p = (unsigned char *) a->ob_sval;
959 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000960 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000961 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000962 x ^= a->ob_size;
963 if (x == -1)
964 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000965#ifdef CACHE_HASH
966 a->ob_shash = x;
967#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000968 return x;
969}
970
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000971static int
Fred Drakeba096332000-07-09 07:04:36 +0000972string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000973{
974 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000975 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000976 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000977 return -1;
978 }
979 *ptr = (void *)self->ob_sval;
980 return self->ob_size;
981}
982
983static int
Fred Drakeba096332000-07-09 07:04:36 +0000984string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000985{
Guido van Rossum045e6881997-09-08 18:30:11 +0000986 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000987 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000988 return -1;
989}
990
991static int
Fred Drakeba096332000-07-09 07:04:36 +0000992string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000993{
994 if ( lenp )
995 *lenp = self->ob_size;
996 return 1;
997}
998
Guido van Rossum1db70701998-10-08 02:18:52 +0000999static int
Fred Drakeba096332000-07-09 07:04:36 +00001000string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001001{
1002 if ( index != 0 ) {
1003 PyErr_SetString(PyExc_SystemError,
1004 "accessing non-existent string segment");
1005 return -1;
1006 }
1007 *ptr = self->ob_sval;
1008 return self->ob_size;
1009}
1010
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001012 (inquiry)string_length, /*sq_length*/
1013 (binaryfunc)string_concat, /*sq_concat*/
1014 (intargfunc)string_repeat, /*sq_repeat*/
1015 (intargfunc)string_item, /*sq_item*/
1016 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001017 0, /*sq_ass_item*/
1018 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001019 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020};
1021
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001022static PyBufferProcs string_as_buffer = {
1023 (getreadbufferproc)string_buffer_getreadbuf,
1024 (getwritebufferproc)string_buffer_getwritebuf,
1025 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001026 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001027};
1028
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029
1030
1031#define LEFTSTRIP 0
1032#define RIGHTSTRIP 1
1033#define BOTHSTRIP 2
1034
1035
1036static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001037split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001038{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001039 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 PyObject* item;
1041 PyObject *list = PyList_New(0);
1042
1043 if (list == NULL)
1044 return NULL;
1045
Guido van Rossum4c08d552000-03-10 22:55:18 +00001046 for (i = j = 0; i < len; ) {
1047 while (i < len && isspace(Py_CHARMASK(s[i])))
1048 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 while (i < len && !isspace(Py_CHARMASK(s[i])))
1051 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 if (maxsplit-- <= 0)
1054 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1056 if (item == NULL)
1057 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001058 err = PyList_Append(list, item);
1059 Py_DECREF(item);
1060 if (err < 0)
1061 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062 while (i < len && isspace(Py_CHARMASK(s[i])))
1063 i++;
1064 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065 }
1066 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001067 if (j < len) {
1068 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1069 if (item == NULL)
1070 goto finally;
1071 err = PyList_Append(list, item);
1072 Py_DECREF(item);
1073 if (err < 0)
1074 goto finally;
1075 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076 return list;
1077 finally:
1078 Py_DECREF(list);
1079 return NULL;
1080}
1081
1082
1083static char split__doc__[] =
1084"S.split([sep [,maxsplit]]) -> list of strings\n\
1085\n\
1086Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087delimiter string. If maxsplit is given, at most maxsplit\n\
1088splits are done. If sep is not specified, any whitespace string\n\
1089is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090
1091static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001092string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093{
1094 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001095 int maxsplit = -1;
1096 const char *s = PyString_AS_STRING(self), *sub;
1097 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001098
Guido van Rossum4c08d552000-03-10 22:55:18 +00001099 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001100 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001101 if (maxsplit < 0)
1102 maxsplit = INT_MAX;
1103 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001104 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001105 if (PyString_Check(subobj)) {
1106 sub = PyString_AS_STRING(subobj);
1107 n = PyString_GET_SIZE(subobj);
1108 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001109#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001110 else if (PyUnicode_Check(subobj))
1111 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001112#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001113 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1114 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001115 if (n == 0) {
1116 PyErr_SetString(PyExc_ValueError, "empty separator");
1117 return NULL;
1118 }
1119
1120 list = PyList_New(0);
1121 if (list == NULL)
1122 return NULL;
1123
1124 i = j = 0;
1125 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001126 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001127 if (maxsplit-- <= 0)
1128 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001129 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1130 if (item == NULL)
1131 goto fail;
1132 err = PyList_Append(list, item);
1133 Py_DECREF(item);
1134 if (err < 0)
1135 goto fail;
1136 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137 }
1138 else
1139 i++;
1140 }
1141 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1142 if (item == NULL)
1143 goto fail;
1144 err = PyList_Append(list, item);
1145 Py_DECREF(item);
1146 if (err < 0)
1147 goto fail;
1148
1149 return list;
1150
1151 fail:
1152 Py_DECREF(list);
1153 return NULL;
1154}
1155
1156
1157static char join__doc__[] =
1158"S.join(sequence) -> string\n\
1159\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001160Return a string which is the concatenation of the strings in the\n\
1161sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001162
1163static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001164string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
1166 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001167 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001168 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169 char *p;
1170 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001171 size_t sz = 0;
1172 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001173 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174
Tim Peters19fe14e2001-01-19 03:03:47 +00001175 seq = PySequence_Fast(orig, "");
1176 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001177 if (PyErr_ExceptionMatches(PyExc_TypeError))
1178 PyErr_Format(PyExc_TypeError,
1179 "sequence expected, %.80s found",
1180 orig->ob_type->tp_name);
1181 return NULL;
1182 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001183
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001184 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001185 if (seqlen == 0) {
1186 Py_DECREF(seq);
1187 return PyString_FromString("");
1188 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001189 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001190 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001191 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1192 PyErr_Format(PyExc_TypeError,
1193 "sequence item 0: expected string,"
1194 " %.80s found",
1195 item->ob_type->tp_name);
1196 Py_DECREF(seq);
1197 return NULL;
1198 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001199 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001200 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001203
Tim Peters19fe14e2001-01-19 03:03:47 +00001204 /* There are at least two things to join. Do a pre-pass to figure out
1205 * the total amount of space we'll need (sz), see whether any argument
1206 * is absurd, and defer to the Unicode join if appropriate.
1207 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001208 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001209 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001210 item = PySequence_Fast_GET_ITEM(seq, i);
1211 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001212#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001213 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001214 /* Defer to Unicode join.
1215 * CAUTION: There's no gurantee that the
1216 * original sequence can be iterated over
1217 * again, so we must pass seq here.
1218 */
1219 PyObject *result;
1220 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001221 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001222 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001223 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001224#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001225 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001226 "sequence item %i: expected string,"
1227 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001228 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001229 Py_DECREF(seq);
1230 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001231 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001232 sz += PyString_GET_SIZE(item);
1233 if (i != 0)
1234 sz += seplen;
1235 if (sz < old_sz || sz > INT_MAX) {
1236 PyErr_SetString(PyExc_OverflowError,
1237 "join() is too long for a Python string");
1238 Py_DECREF(seq);
1239 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001241 }
1242
1243 /* Allocate result space. */
1244 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1245 if (res == NULL) {
1246 Py_DECREF(seq);
1247 return NULL;
1248 }
1249
1250 /* Catenate everything. */
1251 p = PyString_AS_STRING(res);
1252 for (i = 0; i < seqlen; ++i) {
1253 size_t n;
1254 item = PySequence_Fast_GET_ITEM(seq, i);
1255 n = PyString_GET_SIZE(item);
1256 memcpy(p, PyString_AS_STRING(item), n);
1257 p += n;
1258 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001259 memcpy(p, sep, seplen);
1260 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001261 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001263
Jeremy Hylton49048292000-07-11 03:28:17 +00001264 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266}
1267
Tim Peters52e155e2001-06-16 05:42:57 +00001268PyObject *
1269_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001270{
Tim Petersa7259592001-06-16 05:11:17 +00001271 assert(sep != NULL && PyString_Check(sep));
1272 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001273 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001274}
1275
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001276static long
Fred Drakeba096332000-07-09 07:04:36 +00001277string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 int len = PyString_GET_SIZE(self);
1281 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001284 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001285 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 return -2;
1287 if (PyString_Check(subobj)) {
1288 sub = PyString_AS_STRING(subobj);
1289 n = PyString_GET_SIZE(subobj);
1290 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001291#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 else if (PyUnicode_Check(subobj))
1293 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001294#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 return -2;
1297
1298 if (last > len)
1299 last = len;
1300 if (last < 0)
1301 last += len;
1302 if (last < 0)
1303 last = 0;
1304 if (i < 0)
1305 i += len;
1306 if (i < 0)
1307 i = 0;
1308
Guido van Rossum4c08d552000-03-10 22:55:18 +00001309 if (dir > 0) {
1310 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 last -= n;
1313 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001314 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 return (long)i;
1316 }
1317 else {
1318 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001319
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320 if (n == 0 && i <= last)
1321 return (long)last;
1322 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001323 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001324 return (long)j;
1325 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001326
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327 return -1;
1328}
1329
1330
1331static char find__doc__[] =
1332"S.find(sub [,start [,end]]) -> int\n\
1333\n\
1334Return the lowest index in S where substring sub is found,\n\
1335such that sub is contained within s[start,end]. Optional\n\
1336arguments start and end are interpreted as in slice notation.\n\
1337\n\
1338Return -1 on failure.";
1339
1340static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001341string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001343 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 if (result == -2)
1345 return NULL;
1346 return PyInt_FromLong(result);
1347}
1348
1349
1350static char index__doc__[] =
1351"S.index(sub [,start [,end]]) -> int\n\
1352\n\
1353Like S.find() but raise ValueError when the substring is not found.";
1354
1355static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001356string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359 if (result == -2)
1360 return NULL;
1361 if (result == -1) {
1362 PyErr_SetString(PyExc_ValueError,
1363 "substring not found in string.index");
1364 return NULL;
1365 }
1366 return PyInt_FromLong(result);
1367}
1368
1369
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370static char rfind__doc__[] =
1371"S.rfind(sub [,start [,end]]) -> int\n\
1372\n\
1373Return the highest index in S where substring sub is found,\n\
1374such that sub is contained within s[start,end]. Optional\n\
1375arguments start and end are interpreted as in slice notation.\n\
1376\n\
1377Return -1 on failure.";
1378
1379static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001380string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001382 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 if (result == -2)
1384 return NULL;
1385 return PyInt_FromLong(result);
1386}
1387
1388
1389static char rindex__doc__[] =
1390"S.rindex(sub [,start [,end]]) -> int\n\
1391\n\
1392Like S.rfind() but raise ValueError when the substring is not found.";
1393
1394static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001395string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001397 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398 if (result == -2)
1399 return NULL;
1400 if (result == -1) {
1401 PyErr_SetString(PyExc_ValueError,
1402 "substring not found in string.rindex");
1403 return NULL;
1404 }
1405 return PyInt_FromLong(result);
1406}
1407
1408
1409static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001410do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411{
1412 char *s = PyString_AS_STRING(self);
1413 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 i = 0;
1416 if (striptype != RIGHTSTRIP) {
1417 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1418 i++;
1419 }
1420 }
1421
1422 j = len;
1423 if (striptype != LEFTSTRIP) {
1424 do {
1425 j--;
1426 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1427 j++;
1428 }
1429
Tim Peters8fa5dd02001-09-12 02:18:30 +00001430 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 Py_INCREF(self);
1432 return (PyObject*)self;
1433 }
1434 else
1435 return PyString_FromStringAndSize(s+i, j-i);
1436}
1437
1438
1439static char strip__doc__[] =
1440"S.strip() -> string\n\
1441\n\
1442Return a copy of the string S with leading and trailing\n\
1443whitespace removed.";
1444
1445static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001446string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001448 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449}
1450
1451
1452static char lstrip__doc__[] =
1453"S.lstrip() -> string\n\
1454\n\
1455Return a copy of the string S with leading whitespace removed.";
1456
1457static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001458string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001460 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461}
1462
1463
1464static char rstrip__doc__[] =
1465"S.rstrip() -> string\n\
1466\n\
1467Return a copy of the string S with trailing whitespace removed.";
1468
1469static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001470string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001472 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473}
1474
1475
1476static char lower__doc__[] =
1477"S.lower() -> string\n\
1478\n\
1479Return a copy of the string S converted to lowercase.";
1480
1481static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001482string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483{
1484 char *s = PyString_AS_STRING(self), *s_new;
1485 int i, n = PyString_GET_SIZE(self);
1486 PyObject *new;
1487
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 new = PyString_FromStringAndSize(NULL, n);
1489 if (new == NULL)
1490 return NULL;
1491 s_new = PyString_AsString(new);
1492 for (i = 0; i < n; i++) {
1493 int c = Py_CHARMASK(*s++);
1494 if (isupper(c)) {
1495 *s_new = tolower(c);
1496 } else
1497 *s_new = c;
1498 s_new++;
1499 }
1500 return new;
1501}
1502
1503
1504static char upper__doc__[] =
1505"S.upper() -> string\n\
1506\n\
1507Return a copy of the string S converted to uppercase.";
1508
1509static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001510string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511{
1512 char *s = PyString_AS_STRING(self), *s_new;
1513 int i, n = PyString_GET_SIZE(self);
1514 PyObject *new;
1515
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 new = PyString_FromStringAndSize(NULL, n);
1517 if (new == NULL)
1518 return NULL;
1519 s_new = PyString_AsString(new);
1520 for (i = 0; i < n; i++) {
1521 int c = Py_CHARMASK(*s++);
1522 if (islower(c)) {
1523 *s_new = toupper(c);
1524 } else
1525 *s_new = c;
1526 s_new++;
1527 }
1528 return new;
1529}
1530
1531
Guido van Rossum4c08d552000-03-10 22:55:18 +00001532static char title__doc__[] =
1533"S.title() -> string\n\
1534\n\
1535Return a titlecased version of S, i.e. words start with uppercase\n\
1536characters, all remaining cased characters have lowercase.";
1537
1538static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001539string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540{
1541 char *s = PyString_AS_STRING(self), *s_new;
1542 int i, n = PyString_GET_SIZE(self);
1543 int previous_is_cased = 0;
1544 PyObject *new;
1545
Guido van Rossum4c08d552000-03-10 22:55:18 +00001546 new = PyString_FromStringAndSize(NULL, n);
1547 if (new == NULL)
1548 return NULL;
1549 s_new = PyString_AsString(new);
1550 for (i = 0; i < n; i++) {
1551 int c = Py_CHARMASK(*s++);
1552 if (islower(c)) {
1553 if (!previous_is_cased)
1554 c = toupper(c);
1555 previous_is_cased = 1;
1556 } else if (isupper(c)) {
1557 if (previous_is_cased)
1558 c = tolower(c);
1559 previous_is_cased = 1;
1560 } else
1561 previous_is_cased = 0;
1562 *s_new++ = c;
1563 }
1564 return new;
1565}
1566
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567static char capitalize__doc__[] =
1568"S.capitalize() -> string\n\
1569\n\
1570Return a copy of the string S with only its first character\n\
1571capitalized.";
1572
1573static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001574string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575{
1576 char *s = PyString_AS_STRING(self), *s_new;
1577 int i, n = PyString_GET_SIZE(self);
1578 PyObject *new;
1579
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580 new = PyString_FromStringAndSize(NULL, n);
1581 if (new == NULL)
1582 return NULL;
1583 s_new = PyString_AsString(new);
1584 if (0 < n) {
1585 int c = Py_CHARMASK(*s++);
1586 if (islower(c))
1587 *s_new = toupper(c);
1588 else
1589 *s_new = c;
1590 s_new++;
1591 }
1592 for (i = 1; i < n; i++) {
1593 int c = Py_CHARMASK(*s++);
1594 if (isupper(c))
1595 *s_new = tolower(c);
1596 else
1597 *s_new = c;
1598 s_new++;
1599 }
1600 return new;
1601}
1602
1603
1604static char count__doc__[] =
1605"S.count(sub[, start[, end]]) -> int\n\
1606\n\
1607Return the number of occurrences of substring sub in string\n\
1608S[start:end]. Optional arguments start and end are\n\
1609interpreted as in slice notation.";
1610
1611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001612string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int len = PyString_GET_SIZE(self), n;
1616 int i = 0, last = INT_MAX;
1617 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
Guido van Rossumc6821402000-05-08 14:08:05 +00001620 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1621 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001623
Guido van Rossum4c08d552000-03-10 22:55:18 +00001624 if (PyString_Check(subobj)) {
1625 sub = PyString_AS_STRING(subobj);
1626 n = PyString_GET_SIZE(subobj);
1627 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001628#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001629 else if (PyUnicode_Check(subobj)) {
1630 int count;
1631 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1632 if (count == -1)
1633 return NULL;
1634 else
1635 return PyInt_FromLong((long) count);
1636 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001637#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001638 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1639 return NULL;
1640
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 if (last > len)
1642 last = len;
1643 if (last < 0)
1644 last += len;
1645 if (last < 0)
1646 last = 0;
1647 if (i < 0)
1648 i += len;
1649 if (i < 0)
1650 i = 0;
1651 m = last + 1 - n;
1652 if (n == 0)
1653 return PyInt_FromLong((long) (m-i));
1654
1655 r = 0;
1656 while (i < m) {
1657 if (!memcmp(s+i, sub, n)) {
1658 r++;
1659 i += n;
1660 } else {
1661 i++;
1662 }
1663 }
1664 return PyInt_FromLong((long) r);
1665}
1666
1667
1668static char swapcase__doc__[] =
1669"S.swapcase() -> string\n\
1670\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672converted to lowercase and vice versa.";
1673
1674static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001675string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676{
1677 char *s = PyString_AS_STRING(self), *s_new;
1678 int i, n = PyString_GET_SIZE(self);
1679 PyObject *new;
1680
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681 new = PyString_FromStringAndSize(NULL, n);
1682 if (new == NULL)
1683 return NULL;
1684 s_new = PyString_AsString(new);
1685 for (i = 0; i < n; i++) {
1686 int c = Py_CHARMASK(*s++);
1687 if (islower(c)) {
1688 *s_new = toupper(c);
1689 }
1690 else if (isupper(c)) {
1691 *s_new = tolower(c);
1692 }
1693 else
1694 *s_new = c;
1695 s_new++;
1696 }
1697 return new;
1698}
1699
1700
1701static char translate__doc__[] =
1702"S.translate(table [,deletechars]) -> string\n\
1703\n\
1704Return a copy of the string S, where all characters occurring\n\
1705in the optional argument deletechars are removed, and the\n\
1706remaining characters have been mapped through the given\n\
1707translation table, which must be a string of length 256.";
1708
1709static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001710string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 register char *input, *output;
1713 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 register int i, c, changed = 0;
1715 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001716 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 int inlen, tablen, dellen = 0;
1718 PyObject *result;
1719 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001720 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721
Guido van Rossum4c08d552000-03-10 22:55:18 +00001722 if (!PyArg_ParseTuple(args, "O|O:translate",
1723 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001725
1726 if (PyString_Check(tableobj)) {
1727 table1 = PyString_AS_STRING(tableobj);
1728 tablen = PyString_GET_SIZE(tableobj);
1729 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001730#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001732 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733 parameter; instead a mapping to None will cause characters
1734 to be deleted. */
1735 if (delobj != NULL) {
1736 PyErr_SetString(PyExc_TypeError,
1737 "deletions are implemented differently for unicode");
1738 return NULL;
1739 }
1740 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1741 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001742#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745
1746 if (delobj != NULL) {
1747 if (PyString_Check(delobj)) {
1748 del_table = PyString_AS_STRING(delobj);
1749 dellen = PyString_GET_SIZE(delobj);
1750 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001751#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 else if (PyUnicode_Check(delobj)) {
1753 PyErr_SetString(PyExc_TypeError,
1754 "deletions are implemented differently for unicode");
1755 return NULL;
1756 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001757#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1759 return NULL;
1760
1761 if (tablen != 256) {
1762 PyErr_SetString(PyExc_ValueError,
1763 "translation table must be 256 characters long");
1764 return NULL;
1765 }
1766 }
1767 else {
1768 del_table = NULL;
1769 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 }
1771
1772 table = table1;
1773 inlen = PyString_Size(input_obj);
1774 result = PyString_FromStringAndSize((char *)NULL, inlen);
1775 if (result == NULL)
1776 return NULL;
1777 output_start = output = PyString_AsString(result);
1778 input = PyString_AsString(input_obj);
1779
1780 if (dellen == 0) {
1781 /* If no deletions are required, use faster code */
1782 for (i = inlen; --i >= 0; ) {
1783 c = Py_CHARMASK(*input++);
1784 if (Py_CHARMASK((*output++ = table[c])) != c)
1785 changed = 1;
1786 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001787 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788 return result;
1789 Py_DECREF(result);
1790 Py_INCREF(input_obj);
1791 return input_obj;
1792 }
1793
1794 for (i = 0; i < 256; i++)
1795 trans_table[i] = Py_CHARMASK(table[i]);
1796
1797 for (i = 0; i < dellen; i++)
1798 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1799
1800 for (i = inlen; --i >= 0; ) {
1801 c = Py_CHARMASK(*input++);
1802 if (trans_table[c] != -1)
1803 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1804 continue;
1805 changed = 1;
1806 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001807 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808 Py_DECREF(result);
1809 Py_INCREF(input_obj);
1810 return input_obj;
1811 }
1812 /* Fix the size of the resulting string */
1813 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1814 return NULL;
1815 return result;
1816}
1817
1818
1819/* What follows is used for implementing replace(). Perry Stoll. */
1820
1821/*
1822 mymemfind
1823
1824 strstr replacement for arbitrary blocks of memory.
1825
Barry Warsaw51ac5802000-03-20 16:36:48 +00001826 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 contents of memory pointed to by PAT. Returns the index into MEM if
1828 found, or -1 if not found. If len of PAT is greater than length of
1829 MEM, the function returns -1.
1830*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001831static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001832mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833{
1834 register int ii;
1835
1836 /* pattern can not occur in the last pat_len-1 chars */
1837 len -= pat_len;
1838
1839 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001840 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841 return ii;
1842 }
1843 }
1844 return -1;
1845}
1846
1847/*
1848 mymemcnt
1849
1850 Return the number of distinct times PAT is found in MEM.
1851 meaning mem=1111 and pat==11 returns 2.
1852 mem=11111 and pat==11 also return 2.
1853 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001854static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001855mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
1857 register int offset = 0;
1858 int nfound = 0;
1859
1860 while (len >= 0) {
1861 offset = mymemfind(mem, len, pat, pat_len);
1862 if (offset == -1)
1863 break;
1864 mem += offset + pat_len;
1865 len -= offset + pat_len;
1866 nfound++;
1867 }
1868 return nfound;
1869}
1870
1871/*
1872 mymemreplace
1873
Thomas Wouters7e474022000-07-16 12:04:32 +00001874 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875 replaced with SUB.
1876
Thomas Wouters7e474022000-07-16 12:04:32 +00001877 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 of PAT in STR, then the original string is returned. Otherwise, a new
1879 string is allocated here and returned.
1880
1881 on return, out_len is:
1882 the length of output string, or
1883 -1 if the input string is returned, or
1884 unchanged if an error occurs (no memory).
1885
1886 return value is:
1887 the new string allocated locally, or
1888 NULL if an error occurred.
1889*/
1890static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001891mymemreplace(const char *str, int len, /* input string */
1892 const char *pat, int pat_len, /* pattern string to find */
1893 const char *sub, int sub_len, /* substitution string */
1894 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001895 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896{
1897 char *out_s;
1898 char *new_s;
1899 int nfound, offset, new_len;
1900
1901 if (len == 0 || pat_len > len)
1902 goto return_same;
1903
1904 /* find length of output string */
1905 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001906 if (count < 0)
1907 count = INT_MAX;
1908 else if (nfound > count)
1909 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 if (nfound == 0)
1911 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001912
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001914 if (new_len == 0) {
1915 /* Have to allocate something for the caller to free(). */
1916 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001917 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001918 return NULL;
1919 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001921 else {
1922 assert(new_len > 0);
1923 new_s = (char *)PyMem_MALLOC(new_len);
1924 if (new_s == NULL)
1925 return NULL;
1926 out_s = new_s;
1927
Tim Peters9c012af2001-05-10 00:32:57 +00001928 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001929 /* find index of next instance of pattern */
1930 offset = mymemfind(str, len, pat, pat_len);
1931 if (offset == -1)
1932 break;
1933
1934 /* copy non matching part of input string */
1935 memcpy(new_s, str, offset);
1936 str += offset + pat_len;
1937 len -= offset + pat_len;
1938
1939 /* copy substitute into the output string */
1940 new_s += offset;
1941 memcpy(new_s, sub, sub_len);
1942 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001943 }
1944 /* copy any remaining values into output string */
1945 if (len > 0)
1946 memcpy(new_s, str, len);
1947 }
1948 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949 return out_s;
1950
1951 return_same:
1952 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001953 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954}
1955
1956
1957static char replace__doc__[] =
1958"S.replace (old, new[, maxsplit]) -> string\n\
1959\n\
1960Return a copy of string S with all occurrences of substring\n\
1961old replaced by new. If the optional argument maxsplit is\n\
1962given, only the first maxsplit occurrences are replaced.";
1963
1964static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001965string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001967 const char *str = PyString_AS_STRING(self), *sub, *repl;
1968 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001969 const int len = PyString_GET_SIZE(self);
1970 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001971 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001973 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974
Guido van Rossum4c08d552000-03-10 22:55:18 +00001975 if (!PyArg_ParseTuple(args, "OO|i:replace",
1976 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001978
1979 if (PyString_Check(subobj)) {
1980 sub = PyString_AS_STRING(subobj);
1981 sub_len = PyString_GET_SIZE(subobj);
1982 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001983#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001985 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001987#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1989 return NULL;
1990
1991 if (PyString_Check(replobj)) {
1992 repl = PyString_AS_STRING(replobj);
1993 repl_len = PyString_GET_SIZE(replobj);
1994 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001995#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001996 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001997 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001998 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001999#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002000 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2001 return NULL;
2002
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002003 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002004 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 return NULL;
2006 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002007 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 if (new_s == NULL) {
2009 PyErr_NoMemory();
2010 return NULL;
2011 }
2012 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002013 if (PyString_CheckExact(self)) {
2014 /* we're returning another reference to self */
2015 new = (PyObject*)self;
2016 Py_INCREF(new);
2017 }
2018 else {
2019 new = PyString_FromStringAndSize(str, len);
2020 if (new == NULL)
2021 return NULL;
2022 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 }
2024 else {
2025 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002026 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027 }
2028 return new;
2029}
2030
2031
2032static char startswith__doc__[] =
2033"S.startswith(prefix[, start[, end]]) -> int\n\
2034\n\
2035Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2036optional start, test S beginning at that position. With optional end, stop\n\
2037comparing S at that position.";
2038
2039static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002040string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 int plen;
2046 int start = 0;
2047 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049
Guido van Rossumc6821402000-05-08 14:08:05 +00002050 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2051 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 return NULL;
2053 if (PyString_Check(subobj)) {
2054 prefix = PyString_AS_STRING(subobj);
2055 plen = PyString_GET_SIZE(subobj);
2056 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002057#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002058 else if (PyUnicode_Check(subobj)) {
2059 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002060 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002061 subobj, start, end, -1);
2062 if (rc == -1)
2063 return NULL;
2064 else
2065 return PyInt_FromLong((long) rc);
2066 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002067#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069 return NULL;
2070
2071 /* adopt Java semantics for index out of range. it is legal for
2072 * offset to be == plen, but this only returns true if prefix is
2073 * the empty string.
2074 */
2075 if (start < 0 || start+plen > len)
2076 return PyInt_FromLong(0);
2077
2078 if (!memcmp(str+start, prefix, plen)) {
2079 /* did the match end after the specified end? */
2080 if (end < 0)
2081 return PyInt_FromLong(1);
2082 else if (end - start < plen)
2083 return PyInt_FromLong(0);
2084 else
2085 return PyInt_FromLong(1);
2086 }
2087 else return PyInt_FromLong(0);
2088}
2089
2090
2091static char endswith__doc__[] =
2092"S.endswith(suffix[, start[, end]]) -> int\n\
2093\n\
2094Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2095optional start, test S beginning at that position. With optional end, stop\n\
2096comparing S at that position.";
2097
2098static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002099string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 const char* suffix;
2104 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105 int start = 0;
2106 int end = -1;
2107 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002108 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109
Guido van Rossumc6821402000-05-08 14:08:05 +00002110 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2111 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112 return NULL;
2113 if (PyString_Check(subobj)) {
2114 suffix = PyString_AS_STRING(subobj);
2115 slen = PyString_GET_SIZE(subobj);
2116 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002117#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002118 else if (PyUnicode_Check(subobj)) {
2119 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002120 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002121 subobj, start, end, +1);
2122 if (rc == -1)
2123 return NULL;
2124 else
2125 return PyInt_FromLong((long) rc);
2126 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002127#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002128 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 return NULL;
2130
Guido van Rossum4c08d552000-03-10 22:55:18 +00002131 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return PyInt_FromLong(0);
2133
2134 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002135 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
Guido van Rossum4c08d552000-03-10 22:55:18 +00002137 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138 return PyInt_FromLong(1);
2139 else return PyInt_FromLong(0);
2140}
2141
2142
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002143static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002144"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002145\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002146Encodes S using the codec registered for encoding. encoding defaults\n\
2147to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002148handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2149a ValueError. Other possible values are 'ignore' and 'replace'.";
2150
2151static PyObject *
2152string_encode(PyStringObject *self, PyObject *args)
2153{
2154 char *encoding = NULL;
2155 char *errors = NULL;
2156 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2157 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002158 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2159}
2160
2161
2162static char decode__doc__[] =
2163"S.decode([encoding[,errors]]) -> object\n\
2164\n\
2165Decodes S using the codec registered for encoding. encoding defaults\n\
2166to the default encoding. errors may be given to set a different error\n\
2167handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2168a ValueError. Other possible values are 'ignore' and 'replace'.";
2169
2170static PyObject *
2171string_decode(PyStringObject *self, PyObject *args)
2172{
2173 char *encoding = NULL;
2174 char *errors = NULL;
2175 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2176 return NULL;
2177 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002178}
2179
2180
Guido van Rossum4c08d552000-03-10 22:55:18 +00002181static char expandtabs__doc__[] =
2182"S.expandtabs([tabsize]) -> string\n\
2183\n\
2184Return a copy of S where all tab characters are expanded using spaces.\n\
2185If tabsize is not given, a tab size of 8 characters is assumed.";
2186
2187static PyObject*
2188string_expandtabs(PyStringObject *self, PyObject *args)
2189{
2190 const char *e, *p;
2191 char *q;
2192 int i, j;
2193 PyObject *u;
2194 int tabsize = 8;
2195
2196 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2197 return NULL;
2198
Thomas Wouters7e474022000-07-16 12:04:32 +00002199 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 i = j = 0;
2201 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2202 for (p = PyString_AS_STRING(self); p < e; p++)
2203 if (*p == '\t') {
2204 if (tabsize > 0)
2205 j += tabsize - (j % tabsize);
2206 }
2207 else {
2208 j++;
2209 if (*p == '\n' || *p == '\r') {
2210 i += j;
2211 j = 0;
2212 }
2213 }
2214
2215 /* Second pass: create output string and fill it */
2216 u = PyString_FromStringAndSize(NULL, i + j);
2217 if (!u)
2218 return NULL;
2219
2220 j = 0;
2221 q = PyString_AS_STRING(u);
2222
2223 for (p = PyString_AS_STRING(self); p < e; p++)
2224 if (*p == '\t') {
2225 if (tabsize > 0) {
2226 i = tabsize - (j % tabsize);
2227 j += i;
2228 while (i--)
2229 *q++ = ' ';
2230 }
2231 }
2232 else {
2233 j++;
2234 *q++ = *p;
2235 if (*p == '\n' || *p == '\r')
2236 j = 0;
2237 }
2238
2239 return u;
2240}
2241
Tim Peters8fa5dd02001-09-12 02:18:30 +00002242static PyObject *
2243pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002244{
2245 PyObject *u;
2246
2247 if (left < 0)
2248 left = 0;
2249 if (right < 0)
2250 right = 0;
2251
Tim Peters8fa5dd02001-09-12 02:18:30 +00002252 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 Py_INCREF(self);
2254 return (PyObject *)self;
2255 }
2256
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002257 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258 left + PyString_GET_SIZE(self) + right);
2259 if (u) {
2260 if (left)
2261 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002262 memcpy(PyString_AS_STRING(u) + left,
2263 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 PyString_GET_SIZE(self));
2265 if (right)
2266 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2267 fill, right);
2268 }
2269
2270 return u;
2271}
2272
2273static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002274"S.ljust(width) -> string\n"
2275"\n"
2276"Return S left justified in a string of length width. Padding is\n"
2277"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278
2279static PyObject *
2280string_ljust(PyStringObject *self, PyObject *args)
2281{
2282 int width;
2283 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2284 return NULL;
2285
Tim Peters8fa5dd02001-09-12 02:18:30 +00002286 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 Py_INCREF(self);
2288 return (PyObject*) self;
2289 }
2290
2291 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2292}
2293
2294
2295static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002296"S.rjust(width) -> string\n"
2297"\n"
2298"Return S right justified in a string of length width. Padding is\n"
2299"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300
2301static PyObject *
2302string_rjust(PyStringObject *self, PyObject *args)
2303{
2304 int width;
2305 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2306 return NULL;
2307
Tim Peters8fa5dd02001-09-12 02:18:30 +00002308 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 Py_INCREF(self);
2310 return (PyObject*) self;
2311 }
2312
2313 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2314}
2315
2316
2317static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002318"S.center(width) -> string\n"
2319"\n"
2320"Return S centered in a string of length width. Padding is done\n"
2321"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322
2323static PyObject *
2324string_center(PyStringObject *self, PyObject *args)
2325{
2326 int marg, left;
2327 int width;
2328
2329 if (!PyArg_ParseTuple(args, "i:center", &width))
2330 return NULL;
2331
Tim Peters8fa5dd02001-09-12 02:18:30 +00002332 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 Py_INCREF(self);
2334 return (PyObject*) self;
2335 }
2336
2337 marg = width - PyString_GET_SIZE(self);
2338 left = marg / 2 + (marg & width & 1);
2339
2340 return pad(self, left, marg - left, ' ');
2341}
2342
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002344"S.isspace() -> int\n"
2345"\n"
2346"Return 1 if there are only whitespace characters in S,\n"
2347"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348
2349static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002350string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351{
Fred Drakeba096332000-07-09 07:04:36 +00002352 register const unsigned char *p
2353 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002354 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 /* Shortcut for single character strings */
2357 if (PyString_GET_SIZE(self) == 1 &&
2358 isspace(*p))
2359 return PyInt_FromLong(1);
2360
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002361 /* Special case for empty strings */
2362 if (PyString_GET_SIZE(self) == 0)
2363 return PyInt_FromLong(0);
2364
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 e = p + PyString_GET_SIZE(self);
2366 for (; p < e; p++) {
2367 if (!isspace(*p))
2368 return PyInt_FromLong(0);
2369 }
2370 return PyInt_FromLong(1);
2371}
2372
2373
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002374static char isalpha__doc__[] =
2375"S.isalpha() -> int\n\
2376\n\
2377Return 1 if all characters in S are alphabetic\n\
2378and there is at least one character in S, 0 otherwise.";
2379
2380static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002381string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002382{
Fred Drakeba096332000-07-09 07:04:36 +00002383 register const unsigned char *p
2384 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002385 register const unsigned char *e;
2386
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002387 /* Shortcut for single character strings */
2388 if (PyString_GET_SIZE(self) == 1 &&
2389 isalpha(*p))
2390 return PyInt_FromLong(1);
2391
2392 /* Special case for empty strings */
2393 if (PyString_GET_SIZE(self) == 0)
2394 return PyInt_FromLong(0);
2395
2396 e = p + PyString_GET_SIZE(self);
2397 for (; p < e; p++) {
2398 if (!isalpha(*p))
2399 return PyInt_FromLong(0);
2400 }
2401 return PyInt_FromLong(1);
2402}
2403
2404
2405static char isalnum__doc__[] =
2406"S.isalnum() -> int\n\
2407\n\
2408Return 1 if all characters in S are alphanumeric\n\
2409and there is at least one character in S, 0 otherwise.";
2410
2411static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002412string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002413{
Fred Drakeba096332000-07-09 07:04:36 +00002414 register const unsigned char *p
2415 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002416 register const unsigned char *e;
2417
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002418 /* Shortcut for single character strings */
2419 if (PyString_GET_SIZE(self) == 1 &&
2420 isalnum(*p))
2421 return PyInt_FromLong(1);
2422
2423 /* Special case for empty strings */
2424 if (PyString_GET_SIZE(self) == 0)
2425 return PyInt_FromLong(0);
2426
2427 e = p + PyString_GET_SIZE(self);
2428 for (; p < e; p++) {
2429 if (!isalnum(*p))
2430 return PyInt_FromLong(0);
2431 }
2432 return PyInt_FromLong(1);
2433}
2434
2435
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436static char isdigit__doc__[] =
2437"S.isdigit() -> int\n\
2438\n\
2439Return 1 if there are only digit characters in S,\n\
24400 otherwise.";
2441
2442static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002443string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444{
Fred Drakeba096332000-07-09 07:04:36 +00002445 register const unsigned char *p
2446 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002447 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 /* Shortcut for single character strings */
2450 if (PyString_GET_SIZE(self) == 1 &&
2451 isdigit(*p))
2452 return PyInt_FromLong(1);
2453
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002454 /* Special case for empty strings */
2455 if (PyString_GET_SIZE(self) == 0)
2456 return PyInt_FromLong(0);
2457
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458 e = p + PyString_GET_SIZE(self);
2459 for (; p < e; p++) {
2460 if (!isdigit(*p))
2461 return PyInt_FromLong(0);
2462 }
2463 return PyInt_FromLong(1);
2464}
2465
2466
2467static char islower__doc__[] =
2468"S.islower() -> int\n\
2469\n\
2470Return 1 if all cased characters in S are lowercase and there is\n\
2471at least one cased character in S, 0 otherwise.";
2472
2473static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002474string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475{
Fred Drakeba096332000-07-09 07:04:36 +00002476 register const unsigned char *p
2477 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002478 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479 int cased;
2480
Guido van Rossum4c08d552000-03-10 22:55:18 +00002481 /* Shortcut for single character strings */
2482 if (PyString_GET_SIZE(self) == 1)
2483 return PyInt_FromLong(islower(*p) != 0);
2484
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002485 /* Special case for empty strings */
2486 if (PyString_GET_SIZE(self) == 0)
2487 return PyInt_FromLong(0);
2488
Guido van Rossum4c08d552000-03-10 22:55:18 +00002489 e = p + PyString_GET_SIZE(self);
2490 cased = 0;
2491 for (; p < e; p++) {
2492 if (isupper(*p))
2493 return PyInt_FromLong(0);
2494 else if (!cased && islower(*p))
2495 cased = 1;
2496 }
2497 return PyInt_FromLong(cased);
2498}
2499
2500
2501static char isupper__doc__[] =
2502"S.isupper() -> int\n\
2503\n\
2504Return 1 if all cased characters in S are uppercase and there is\n\
2505at least one cased character in S, 0 otherwise.";
2506
2507static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002508string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509{
Fred Drakeba096332000-07-09 07:04:36 +00002510 register const unsigned char *p
2511 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002512 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 int cased;
2514
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 /* Shortcut for single character strings */
2516 if (PyString_GET_SIZE(self) == 1)
2517 return PyInt_FromLong(isupper(*p) != 0);
2518
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002519 /* Special case for empty strings */
2520 if (PyString_GET_SIZE(self) == 0)
2521 return PyInt_FromLong(0);
2522
Guido van Rossum4c08d552000-03-10 22:55:18 +00002523 e = p + PyString_GET_SIZE(self);
2524 cased = 0;
2525 for (; p < e; p++) {
2526 if (islower(*p))
2527 return PyInt_FromLong(0);
2528 else if (!cased && isupper(*p))
2529 cased = 1;
2530 }
2531 return PyInt_FromLong(cased);
2532}
2533
2534
2535static char istitle__doc__[] =
2536"S.istitle() -> int\n\
2537\n\
2538Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2539may only follow uncased characters and lowercase characters only cased\n\
2540ones. Return 0 otherwise.";
2541
2542static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002543string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544{
Fred Drakeba096332000-07-09 07:04:36 +00002545 register const unsigned char *p
2546 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002547 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002548 int cased, previous_is_cased;
2549
Guido van Rossum4c08d552000-03-10 22:55:18 +00002550 /* Shortcut for single character strings */
2551 if (PyString_GET_SIZE(self) == 1)
2552 return PyInt_FromLong(isupper(*p) != 0);
2553
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002554 /* Special case for empty strings */
2555 if (PyString_GET_SIZE(self) == 0)
2556 return PyInt_FromLong(0);
2557
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 e = p + PyString_GET_SIZE(self);
2559 cased = 0;
2560 previous_is_cased = 0;
2561 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002562 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563
2564 if (isupper(ch)) {
2565 if (previous_is_cased)
2566 return PyInt_FromLong(0);
2567 previous_is_cased = 1;
2568 cased = 1;
2569 }
2570 else if (islower(ch)) {
2571 if (!previous_is_cased)
2572 return PyInt_FromLong(0);
2573 previous_is_cased = 1;
2574 cased = 1;
2575 }
2576 else
2577 previous_is_cased = 0;
2578 }
2579 return PyInt_FromLong(cased);
2580}
2581
2582
2583static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002584"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585\n\
2586Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002587Line breaks are not included in the resulting list unless keepends\n\
2588is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589
2590#define SPLIT_APPEND(data, left, right) \
2591 str = PyString_FromStringAndSize(data + left, right - left); \
2592 if (!str) \
2593 goto onError; \
2594 if (PyList_Append(list, str)) { \
2595 Py_DECREF(str); \
2596 goto onError; \
2597 } \
2598 else \
2599 Py_DECREF(str);
2600
2601static PyObject*
2602string_splitlines(PyStringObject *self, PyObject *args)
2603{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 register int i;
2605 register int j;
2606 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002607 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608 PyObject *list;
2609 PyObject *str;
2610 char *data;
2611
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002612 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002613 return NULL;
2614
2615 data = PyString_AS_STRING(self);
2616 len = PyString_GET_SIZE(self);
2617
Guido van Rossum4c08d552000-03-10 22:55:18 +00002618 list = PyList_New(0);
2619 if (!list)
2620 goto onError;
2621
2622 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002623 int eol;
2624
Guido van Rossum4c08d552000-03-10 22:55:18 +00002625 /* Find a line and append it */
2626 while (i < len && data[i] != '\n' && data[i] != '\r')
2627 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628
2629 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002630 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 if (i < len) {
2632 if (data[i] == '\r' && i + 1 < len &&
2633 data[i+1] == '\n')
2634 i += 2;
2635 else
2636 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002637 if (keepends)
2638 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002639 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002640 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 j = i;
2642 }
2643 if (j < len) {
2644 SPLIT_APPEND(data, j, len);
2645 }
2646
2647 return list;
2648
2649 onError:
2650 Py_DECREF(list);
2651 return NULL;
2652}
2653
2654#undef SPLIT_APPEND
2655
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002656
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002657static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 /* Counterparts of the obsolete stropmodule functions; except
2660 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002661 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2662 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2663 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2664 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2665 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2666 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2667 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2668 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2669 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2670 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2671 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2672 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2673 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2674 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2675 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2676 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2677 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2678 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2679 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2680 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2681 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2682 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2683 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2684 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2685 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2686 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2687 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2688 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2689 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2690 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2691 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2692 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2693 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002694#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002695 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002696#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002697 {NULL, NULL} /* sentinel */
2698};
2699
Guido van Rossumae960af2001-08-30 03:11:59 +00002700staticforward PyObject *
2701str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2702
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002703static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002704string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002705{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002706 PyObject *x = NULL;
2707 static char *kwlist[] = {"object", 0};
2708
Guido van Rossumae960af2001-08-30 03:11:59 +00002709 if (type != &PyString_Type)
2710 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002711 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2712 return NULL;
2713 if (x == NULL)
2714 return PyString_FromString("");
2715 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002716}
2717
Guido van Rossumae960af2001-08-30 03:11:59 +00002718static PyObject *
2719str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2720{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002721 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002722 int n;
2723
2724 assert(PyType_IsSubtype(type, &PyString_Type));
2725 tmp = string_new(&PyString_Type, args, kwds);
2726 if (tmp == NULL)
2727 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002728 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002729 n = PyString_GET_SIZE(tmp);
2730 pnew = type->tp_alloc(type, n);
2731 if (pnew != NULL) {
2732 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2733#ifdef CACHE_HASH
2734 ((PyStringObject *)pnew)->ob_shash =
2735 ((PyStringObject *)tmp)->ob_shash;
2736#endif
2737#ifdef INTERN_STRINGS
2738 ((PyStringObject *)pnew)->ob_sinterned =
2739 ((PyStringObject *)tmp)->ob_sinterned;
2740#endif
2741 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002742 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002743 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002744}
2745
Tim Peters6d6c1a32001-08-02 04:15:00 +00002746static char string_doc[] =
2747"str(object) -> string\n\
2748\n\
2749Return a nice string representation of the object.\n\
2750If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002751
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002752PyTypeObject PyString_Type = {
2753 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002754 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002755 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002757 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002758 (destructor)string_dealloc, /* tp_dealloc */
2759 (printfunc)string_print, /* tp_print */
2760 0, /* tp_getattr */
2761 0, /* tp_setattr */
2762 0, /* tp_compare */
2763 (reprfunc)string_repr, /* tp_repr */
2764 0, /* tp_as_number */
2765 &string_as_sequence, /* tp_as_sequence */
2766 0, /* tp_as_mapping */
2767 (hashfunc)string_hash, /* tp_hash */
2768 0, /* tp_call */
2769 (reprfunc)string_str, /* tp_str */
2770 PyObject_GenericGetAttr, /* tp_getattro */
2771 0, /* tp_setattro */
2772 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002774 string_doc, /* tp_doc */
2775 0, /* tp_traverse */
2776 0, /* tp_clear */
2777 (richcmpfunc)string_richcompare, /* tp_richcompare */
2778 0, /* tp_weaklistoffset */
2779 0, /* tp_iter */
2780 0, /* tp_iternext */
2781 string_methods, /* tp_methods */
2782 0, /* tp_members */
2783 0, /* tp_getset */
2784 0, /* tp_base */
2785 0, /* tp_dict */
2786 0, /* tp_descr_get */
2787 0, /* tp_descr_set */
2788 0, /* tp_dictoffset */
2789 0, /* tp_init */
2790 0, /* tp_alloc */
2791 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002792 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002793};
2794
2795void
Fred Drakeba096332000-07-09 07:04:36 +00002796PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002797{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002799 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002800 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 if (w == NULL || !PyString_Check(*pv)) {
2802 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002803 *pv = NULL;
2804 return;
2805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002806 v = string_concat((PyStringObject *) *pv, w);
2807 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002808 *pv = v;
2809}
2810
Guido van Rossum013142a1994-08-30 08:19:36 +00002811void
Fred Drakeba096332000-07-09 07:04:36 +00002812PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002813{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002814 PyString_Concat(pv, w);
2815 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002816}
2817
2818
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002819/* The following function breaks the notion that strings are immutable:
2820 it changes the size of a string. We get away with this only if there
2821 is only one module referencing the object. You can also think of it
2822 as creating a new string object and destroying the old one, only
2823 more efficiently. In any case, don't use this if the string may
2824 already be known to some other part of the code... */
2825
2826int
Fred Drakeba096332000-07-09 07:04:36 +00002827_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002828{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002829 register PyObject *v;
2830 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002831 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002832 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002833 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002834 Py_DECREF(v);
2835 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002836 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002837 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002838 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002839#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002840 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002841#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002842 _Py_ForgetReference(v);
2843 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002844 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002846 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002847 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002848 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002849 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002850 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002851 _Py_NewReference(*pv);
2852 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002853 sv->ob_size = newsize;
2854 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002855 return 0;
2856}
Guido van Rossume5372401993-03-16 12:15:04 +00002857
2858/* Helpers for formatstring */
2859
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002860static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002861getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002862{
2863 int argidx = *p_argidx;
2864 if (argidx < arglen) {
2865 (*p_argidx)++;
2866 if (arglen < 0)
2867 return args;
2868 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002869 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002870 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002871 PyErr_SetString(PyExc_TypeError,
2872 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002873 return NULL;
2874}
2875
Tim Peters38fd5b62000-09-21 05:43:11 +00002876/* Format codes
2877 * F_LJUST '-'
2878 * F_SIGN '+'
2879 * F_BLANK ' '
2880 * F_ALT '#'
2881 * F_ZERO '0'
2882 */
Guido van Rossume5372401993-03-16 12:15:04 +00002883#define F_LJUST (1<<0)
2884#define F_SIGN (1<<1)
2885#define F_BLANK (1<<2)
2886#define F_ALT (1<<3)
2887#define F_ZERO (1<<4)
2888
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002889static int
Fred Drakeba096332000-07-09 07:04:36 +00002890formatfloat(char *buf, size_t buflen, int flags,
2891 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002892{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002893 /* fmt = '%#.' + `prec` + `type`
2894 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002895 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002896 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002897 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002898 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002899 if (prec < 0)
2900 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002901 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2902 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002903 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2904 (flags&F_ALT) ? "#" : "",
2905 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002906 /* worst case length calc to ensure no buffer overrun:
2907 fmt = %#.<prec>g
2908 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002909 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002910 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2911 If prec=0 the effective precision is 1 (the leading digit is
2912 always given), therefore increase by one to 10+prec. */
2913 if (buflen <= (size_t)10 + (size_t)prec) {
2914 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002915 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002916 return -1;
2917 }
Tim Peters885d4572001-11-28 20:27:42 +00002918 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002919 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002920}
2921
Tim Peters38fd5b62000-09-21 05:43:11 +00002922/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2923 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2924 * Python's regular ints.
2925 * Return value: a new PyString*, or NULL if error.
2926 * . *pbuf is set to point into it,
2927 * *plen set to the # of chars following that.
2928 * Caller must decref it when done using pbuf.
2929 * The string starting at *pbuf is of the form
2930 * "-"? ("0x" | "0X")? digit+
2931 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002932 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002933 * There will be at least prec digits, zero-filled on the left if
2934 * necessary to get that many.
2935 * val object to be converted
2936 * flags bitmask of format flags; only F_ALT is looked at
2937 * prec minimum number of digits; 0-fill on left if needed
2938 * type a character in [duoxX]; u acts the same as d
2939 *
2940 * CAUTION: o, x and X conversions on regular ints can never
2941 * produce a '-' sign, but can for Python's unbounded ints.
2942 */
2943PyObject*
2944_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2945 char **pbuf, int *plen)
2946{
2947 PyObject *result = NULL;
2948 char *buf;
2949 int i;
2950 int sign; /* 1 if '-', else 0 */
2951 int len; /* number of characters */
2952 int numdigits; /* len == numnondigits + numdigits */
2953 int numnondigits = 0;
2954
2955 switch (type) {
2956 case 'd':
2957 case 'u':
2958 result = val->ob_type->tp_str(val);
2959 break;
2960 case 'o':
2961 result = val->ob_type->tp_as_number->nb_oct(val);
2962 break;
2963 case 'x':
2964 case 'X':
2965 numnondigits = 2;
2966 result = val->ob_type->tp_as_number->nb_hex(val);
2967 break;
2968 default:
2969 assert(!"'type' not in [duoxX]");
2970 }
2971 if (!result)
2972 return NULL;
2973
2974 /* To modify the string in-place, there can only be one reference. */
2975 if (result->ob_refcnt != 1) {
2976 PyErr_BadInternalCall();
2977 return NULL;
2978 }
2979 buf = PyString_AsString(result);
2980 len = PyString_Size(result);
2981 if (buf[len-1] == 'L') {
2982 --len;
2983 buf[len] = '\0';
2984 }
2985 sign = buf[0] == '-';
2986 numnondigits += sign;
2987 numdigits = len - numnondigits;
2988 assert(numdigits > 0);
2989
Tim Petersfff53252001-04-12 18:38:48 +00002990 /* Get rid of base marker unless F_ALT */
2991 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002992 /* Need to skip 0x, 0X or 0. */
2993 int skipped = 0;
2994 switch (type) {
2995 case 'o':
2996 assert(buf[sign] == '0');
2997 /* If 0 is only digit, leave it alone. */
2998 if (numdigits > 1) {
2999 skipped = 1;
3000 --numdigits;
3001 }
3002 break;
3003 case 'x':
3004 case 'X':
3005 assert(buf[sign] == '0');
3006 assert(buf[sign + 1] == 'x');
3007 skipped = 2;
3008 numnondigits -= 2;
3009 break;
3010 }
3011 if (skipped) {
3012 buf += skipped;
3013 len -= skipped;
3014 if (sign)
3015 buf[0] = '-';
3016 }
3017 assert(len == numnondigits + numdigits);
3018 assert(numdigits > 0);
3019 }
3020
3021 /* Fill with leading zeroes to meet minimum width. */
3022 if (prec > numdigits) {
3023 PyObject *r1 = PyString_FromStringAndSize(NULL,
3024 numnondigits + prec);
3025 char *b1;
3026 if (!r1) {
3027 Py_DECREF(result);
3028 return NULL;
3029 }
3030 b1 = PyString_AS_STRING(r1);
3031 for (i = 0; i < numnondigits; ++i)
3032 *b1++ = *buf++;
3033 for (i = 0; i < prec - numdigits; i++)
3034 *b1++ = '0';
3035 for (i = 0; i < numdigits; i++)
3036 *b1++ = *buf++;
3037 *b1 = '\0';
3038 Py_DECREF(result);
3039 result = r1;
3040 buf = PyString_AS_STRING(result);
3041 len = numnondigits + prec;
3042 }
3043
3044 /* Fix up case for hex conversions. */
3045 switch (type) {
3046 case 'x':
3047 /* Need to convert all upper case letters to lower case. */
3048 for (i = 0; i < len; i++)
3049 if (buf[i] >= 'A' && buf[i] <= 'F')
3050 buf[i] += 'a'-'A';
3051 break;
3052 case 'X':
3053 /* Need to convert 0x to 0X (and -0x to -0X). */
3054 if (buf[sign + 1] == 'x')
3055 buf[sign + 1] = 'X';
3056 break;
3057 }
3058 *pbuf = buf;
3059 *plen = len;
3060 return result;
3061}
3062
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003063static int
Fred Drakeba096332000-07-09 07:04:36 +00003064formatint(char *buf, size_t buflen, int flags,
3065 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003066{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003067 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003068 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3069 + 1 + 1 = 24 */
3070 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003071 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003072 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003073 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003074 if (prec < 0)
3075 prec = 1;
Tim Peters885d4572001-11-28 20:27:42 +00003076 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3077 (flags&F_ALT) ? "#" : "",
3078 prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003079 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003080 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003081 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003082 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003083 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003084 return -1;
3085 }
Tim Peters885d4572001-11-28 20:27:42 +00003086 PyOS_snprintf(buf, buflen, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003087 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3088 * but we want it (for consistency with other %#x conversions, and
3089 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003090 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3091 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3092 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003093 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003094 if (x == 0 &&
3095 (flags & F_ALT) &&
3096 (type == 'x' || type == 'X') &&
3097 buf[1] != (char)type) /* this last always true under std C */
3098 {
Tim Petersfff53252001-04-12 18:38:48 +00003099 memmove(buf+2, buf, strlen(buf) + 1);
3100 buf[0] = '0';
3101 buf[1] = (char)type;
3102 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003103 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003104}
3105
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003106static int
Fred Drakeba096332000-07-09 07:04:36 +00003107formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003108{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003109 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 if (PyString_Check(v)) {
3111 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003112 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003113 }
3114 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003115 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003116 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003117 }
3118 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003119 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003120}
3121
Guido van Rossum013142a1994-08-30 08:19:36 +00003122
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003123/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3124
3125 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3126 chars are formatted. XXX This is a magic number. Each formatting
3127 routine does bounds checking to ensure no overflow, but a better
3128 solution may be to malloc a buffer of appropriate size for each
3129 format. For now, the current solution is sufficient.
3130*/
3131#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003132
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003134PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003135{
3136 char *fmt, *res;
3137 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003138 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003139 PyObject *result, *orig_args;
3140#ifdef Py_USING_UNICODE
3141 PyObject *v, *w;
3142#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 PyObject *dict = NULL;
3144 if (format == NULL || !PyString_Check(format) || args == NULL) {
3145 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003146 return NULL;
3147 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003148 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003149 fmt = PyString_AS_STRING(format);
3150 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003151 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003152 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003153 if (result == NULL)
3154 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003155 res = PyString_AsString(result);
3156 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003157 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003158 argidx = 0;
3159 }
3160 else {
3161 arglen = -1;
3162 argidx = -2;
3163 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003164 if (args->ob_type->tp_as_mapping)
3165 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003166 while (--fmtcnt >= 0) {
3167 if (*fmt != '%') {
3168 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003169 rescnt = fmtcnt + 100;
3170 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003171 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003172 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003173 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003174 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003175 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003176 }
3177 *res++ = *fmt++;
3178 }
3179 else {
3180 /* Got a format specifier */
3181 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003182 int width = -1;
3183 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003184 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003185 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003186 PyObject *v = NULL;
3187 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003188 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003189 int sign;
3190 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003191 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003192#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003193 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003194 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003195#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003196
Guido van Rossumda9c2711996-12-05 21:58:58 +00003197 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003198 if (*fmt == '(') {
3199 char *keystart;
3200 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003201 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003202 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003203
3204 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003205 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003206 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003207 goto error;
3208 }
3209 ++fmt;
3210 --fmtcnt;
3211 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003212 /* Skip over balanced parentheses */
3213 while (pcount > 0 && --fmtcnt >= 0) {
3214 if (*fmt == ')')
3215 --pcount;
3216 else if (*fmt == '(')
3217 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003218 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003219 }
3220 keylen = fmt - keystart - 1;
3221 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003222 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003223 "incomplete format key");
3224 goto error;
3225 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 key = PyString_FromStringAndSize(keystart,
3227 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003228 if (key == NULL)
3229 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003230 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003231 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003232 args_owned = 0;
3233 }
3234 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003235 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003236 if (args == NULL) {
3237 goto error;
3238 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003239 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003240 arglen = -1;
3241 argidx = -2;
3242 }
Guido van Rossume5372401993-03-16 12:15:04 +00003243 while (--fmtcnt >= 0) {
3244 switch (c = *fmt++) {
3245 case '-': flags |= F_LJUST; continue;
3246 case '+': flags |= F_SIGN; continue;
3247 case ' ': flags |= F_BLANK; continue;
3248 case '#': flags |= F_ALT; continue;
3249 case '0': flags |= F_ZERO; continue;
3250 }
3251 break;
3252 }
3253 if (c == '*') {
3254 v = getnextarg(args, arglen, &argidx);
3255 if (v == NULL)
3256 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 if (!PyInt_Check(v)) {
3258 PyErr_SetString(PyExc_TypeError,
3259 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003260 goto error;
3261 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003262 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003263 if (width < 0) {
3264 flags |= F_LJUST;
3265 width = -width;
3266 }
Guido van Rossume5372401993-03-16 12:15:04 +00003267 if (--fmtcnt >= 0)
3268 c = *fmt++;
3269 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003270 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003271 width = c - '0';
3272 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003273 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003274 if (!isdigit(c))
3275 break;
3276 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 PyErr_SetString(
3278 PyExc_ValueError,
3279 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003280 goto error;
3281 }
3282 width = width*10 + (c - '0');
3283 }
3284 }
3285 if (c == '.') {
3286 prec = 0;
3287 if (--fmtcnt >= 0)
3288 c = *fmt++;
3289 if (c == '*') {
3290 v = getnextarg(args, arglen, &argidx);
3291 if (v == NULL)
3292 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003293 if (!PyInt_Check(v)) {
3294 PyErr_SetString(
3295 PyExc_TypeError,
3296 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003297 goto error;
3298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003299 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003300 if (prec < 0)
3301 prec = 0;
3302 if (--fmtcnt >= 0)
3303 c = *fmt++;
3304 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003305 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003306 prec = c - '0';
3307 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003308 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003309 if (!isdigit(c))
3310 break;
3311 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 PyErr_SetString(
3313 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003314 "prec too big");
3315 goto error;
3316 }
3317 prec = prec*10 + (c - '0');
3318 }
3319 }
3320 } /* prec */
3321 if (fmtcnt >= 0) {
3322 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003323 if (--fmtcnt >= 0)
3324 c = *fmt++;
3325 }
3326 }
3327 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003328 PyErr_SetString(PyExc_ValueError,
3329 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003330 goto error;
3331 }
3332 if (c != '%') {
3333 v = getnextarg(args, arglen, &argidx);
3334 if (v == NULL)
3335 goto error;
3336 }
3337 sign = 0;
3338 fill = ' ';
3339 switch (c) {
3340 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003341 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003342 len = 1;
3343 break;
3344 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003345 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003346#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003347 if (PyUnicode_Check(v)) {
3348 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003349 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003350 goto unicode;
3351 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003352#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003353 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003354 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003355 else
3356 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003357 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003358 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003359 if (!PyString_Check(temp)) {
3360 PyErr_SetString(PyExc_TypeError,
3361 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003362 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003363 goto error;
3364 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003365 pbuf = PyString_AS_STRING(temp);
3366 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003367 if (prec >= 0 && len > prec)
3368 len = prec;
3369 break;
3370 case 'i':
3371 case 'd':
3372 case 'u':
3373 case 'o':
3374 case 'x':
3375 case 'X':
3376 if (c == 'i')
3377 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003378 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003379 temp = _PyString_FormatLong(v, flags,
3380 prec, c, &pbuf, &len);
3381 if (!temp)
3382 goto error;
3383 /* unbounded ints can always produce
3384 a sign character! */
3385 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003386 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003387 else {
3388 pbuf = formatbuf;
3389 len = formatint(pbuf, sizeof(formatbuf),
3390 flags, prec, c, v);
3391 if (len < 0)
3392 goto error;
3393 /* only d conversion is signed */
3394 sign = c == 'd';
3395 }
3396 if (flags & F_ZERO)
3397 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003398 break;
3399 case 'e':
3400 case 'E':
3401 case 'f':
3402 case 'g':
3403 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003404 pbuf = formatbuf;
3405 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003406 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003407 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003408 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003409 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003410 fill = '0';
3411 break;
3412 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003413 pbuf = formatbuf;
3414 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003415 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003416 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003417 break;
3418 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003419 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003420 "unsupported format character '%c' (0x%x) "
3421 "at index %i",
3422 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003423 goto error;
3424 }
3425 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003426 if (*pbuf == '-' || *pbuf == '+') {
3427 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003428 len--;
3429 }
3430 else if (flags & F_SIGN)
3431 sign = '+';
3432 else if (flags & F_BLANK)
3433 sign = ' ';
3434 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003435 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003436 }
3437 if (width < len)
3438 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003439 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003440 reslen -= rescnt;
3441 rescnt = width + fmtcnt + 100;
3442 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003444 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003445 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003447 }
3448 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003449 if (fill != ' ')
3450 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003451 rescnt--;
3452 if (width > len)
3453 width--;
3454 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003455 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3456 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003457 assert(pbuf[1] == c);
3458 if (fill != ' ') {
3459 *res++ = *pbuf++;
3460 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003461 }
Tim Petersfff53252001-04-12 18:38:48 +00003462 rescnt -= 2;
3463 width -= 2;
3464 if (width < 0)
3465 width = 0;
3466 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003467 }
3468 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003469 do {
3470 --rescnt;
3471 *res++ = fill;
3472 } while (--width > len);
3473 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003474 if (fill == ' ') {
3475 if (sign)
3476 *res++ = sign;
3477 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003478 (c == 'x' || c == 'X')) {
3479 assert(pbuf[0] == '0');
3480 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003481 *res++ = *pbuf++;
3482 *res++ = *pbuf++;
3483 }
3484 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003485 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003486 res += len;
3487 rescnt -= len;
3488 while (--width >= len) {
3489 --rescnt;
3490 *res++ = ' ';
3491 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003492 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003493 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003494 "not all arguments converted");
3495 goto error;
3496 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003497 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003498 } /* '%' */
3499 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003500 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003501 PyErr_SetString(PyExc_TypeError,
3502 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003503 goto error;
3504 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003505 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003506 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003507 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003508 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003509 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003510
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003511#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003512 unicode:
3513 if (args_owned) {
3514 Py_DECREF(args);
3515 args_owned = 0;
3516 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003517 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003518 if (PyTuple_Check(orig_args) && argidx > 0) {
3519 PyObject *v;
3520 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3521 v = PyTuple_New(n);
3522 if (v == NULL)
3523 goto error;
3524 while (--n >= 0) {
3525 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3526 Py_INCREF(w);
3527 PyTuple_SET_ITEM(v, n, w);
3528 }
3529 args = v;
3530 } else {
3531 Py_INCREF(orig_args);
3532 args = orig_args;
3533 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003534 args_owned = 1;
3535 /* Take what we have of the result and let the Unicode formatting
3536 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003537 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003538 if (_PyString_Resize(&result, rescnt))
3539 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003540 fmtcnt = PyString_GET_SIZE(format) - \
3541 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003542 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3543 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003544 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003545 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003546 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003547 if (v == NULL)
3548 goto error;
3549 /* Paste what we have (result) to what the Unicode formatting
3550 function returned (v) and return the result (or error) */
3551 w = PyUnicode_Concat(result, v);
3552 Py_DECREF(result);
3553 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003554 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003555 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003556#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003557
Guido van Rossume5372401993-03-16 12:15:04 +00003558 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003559 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003560 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003561 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003562 }
Guido van Rossume5372401993-03-16 12:15:04 +00003563 return NULL;
3564}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003565
3566
3567#ifdef INTERN_STRINGS
3568
Barry Warsaw4df762f2000-08-16 23:41:01 +00003569/* This dictionary will leak at PyString_Fini() time. That's acceptable
3570 * because PyString_Fini() specifically frees interned strings that are
3571 * only referenced by this dictionary. The CVS log entry for revision 2.45
3572 * says:
3573 *
3574 * Change the Fini function to only remove otherwise unreferenced
3575 * strings from the interned table. There are references in
3576 * hard-to-find static variables all over the interpreter, and it's not
3577 * worth trying to get rid of all those; but "uninterning" isn't fair
3578 * either and may cause subtle failures later -- so we have to keep them
3579 * in the interned table.
3580 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003581static PyObject *interned;
3582
3583void
Fred Drakeba096332000-07-09 07:04:36 +00003584PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003585{
3586 register PyStringObject *s = (PyStringObject *)(*p);
3587 PyObject *t;
3588 if (s == NULL || !PyString_Check(s))
3589 Py_FatalError("PyString_InternInPlace: strings only please!");
3590 if ((t = s->ob_sinterned) != NULL) {
3591 if (t == (PyObject *)s)
3592 return;
3593 Py_INCREF(t);
3594 *p = t;
3595 Py_DECREF(s);
3596 return;
3597 }
3598 if (interned == NULL) {
3599 interned = PyDict_New();
3600 if (interned == NULL)
3601 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003602 }
3603 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3604 Py_INCREF(t);
3605 *p = s->ob_sinterned = t;
3606 Py_DECREF(s);
3607 return;
3608 }
Tim Peters111f6092001-09-12 07:54:51 +00003609 /* Ensure that only true string objects appear in the intern dict,
3610 and as the value of ob_sinterned. */
3611 if (PyString_CheckExact(s)) {
3612 t = (PyObject *)s;
3613 if (PyDict_SetItem(interned, t, t) == 0) {
3614 s->ob_sinterned = t;
3615 return;
3616 }
3617 }
3618 else {
3619 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3620 PyString_GET_SIZE(s));
3621 if (t != NULL) {
3622 if (PyDict_SetItem(interned, t, t) == 0) {
3623 *p = s->ob_sinterned = t;
3624 Py_DECREF(s);
3625 return;
3626 }
3627 Py_DECREF(t);
3628 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003629 }
3630 PyErr_Clear();
3631}
3632
3633
3634PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003635PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003636{
3637 PyObject *s = PyString_FromString(cp);
3638 if (s == NULL)
3639 return NULL;
3640 PyString_InternInPlace(&s);
3641 return s;
3642}
3643
3644#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003645
3646void
Fred Drakeba096332000-07-09 07:04:36 +00003647PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003648{
3649 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003650 for (i = 0; i < UCHAR_MAX + 1; i++) {
3651 Py_XDECREF(characters[i]);
3652 characters[i] = NULL;
3653 }
3654#ifndef DONT_SHARE_SHORT_STRINGS
3655 Py_XDECREF(nullstring);
3656 nullstring = NULL;
3657#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003658#ifdef INTERN_STRINGS
3659 if (interned) {
3660 int pos, changed;
3661 PyObject *key, *value;
3662 do {
3663 changed = 0;
3664 pos = 0;
3665 while (PyDict_Next(interned, &pos, &key, &value)) {
3666 if (key->ob_refcnt == 2 && key == value) {
3667 PyDict_DelItem(interned, key);
3668 changed = 1;
3669 }
3670 }
3671 } while (changed);
3672 }
3673#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003674}
Barry Warsawa903ad982001-02-23 16:40:48 +00003675
3676#ifdef INTERN_STRINGS
3677void _Py_ReleaseInternedStrings(void)
3678{
3679 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003680 fprintf(stderr, "releasing interned strings\n");
3681 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003682 Py_DECREF(interned);
3683 interned = NULL;
3684 }
3685}
3686#endif /* INTERN_STRINGS */