blob: 46bd99adb5aac63ff5edc6b203d93c430865b8ab [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
Martin v. Löwisd1327502001-12-02 18:09:41 +000022 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
23 to share string objects. When the size of the string is zero, these
24 routines always return a pointer to the same string object; when the size
25 is one, they return a pointer to an already existing object if the contents
26 of the string is known. For PyString_FromString() this is always the case,
27 for PyString_FromStringAndSize() this is the case when the first argument
28 in not NULL.
29
30 A common practice of allocating a string and then filling it in or changing
31 it must be done carefully. It is only allowed to change the contents of
32 the string if the object was gotten from PyString_FromStringAndSize() with
33 a NULL first argument, because in the future these routines may try to do
34 even more sharing of objects.
35
36 The parameter `size' denotes number of characters to allocate, not counting
37 the null terminating character. If the `str' argument is not NULL, then it
38 must point to a null-terminated string of length `size'.
39
40 The member `op->ob_size' denotes the number of bytes of data in the string,
41 not counting the null terminating character, and is therefore equal to the
42 `size' parameter.
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000048#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000049 if (size == 0 && (op = nullstring) != NULL) {
50#ifdef COUNT_ALLOCS
51 null_strings++;
52#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 Py_INCREF(op);
54 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000056 if (size == 1 && str != NULL &&
57 (op = characters[*str & UCHAR_MAX]) != NULL)
58 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059#ifdef COUNT_ALLOCS
60 one_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000065#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000066
67 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000069 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000070 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000072 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef CACHE_HASH
74 op->ob_shash = -1;
75#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000076#ifdef INTERN_STRINGS
77 op->ob_sinterned = NULL;
78#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000142 PyObject *t = (PyObject *)op;
143 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000144 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000148 PyObject *t = (PyObject *)op;
149 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000150 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000154#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
170 count = vargs;
171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' in %ld, since it doesn't change the
180 width. although only %d is supported (see
181 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000182 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000183 if (*f == 'l' && *(f+1) == 'd')
184 ++f;
185
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'i': case 'x':
194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
233
234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 int i, longflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (isdigit(Py_CHARMASK(*f)))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
252 f++;
253 /* handle the long flag, but only for %ld. others
254 can be added when necessary. */
255 if (*f == 'l' && *(f+1) == 'd') {
256 longflag = 1;
257 ++f;
258 }
259
260 switch (*f) {
261 case 'c':
262 *s++ = va_arg(vargs, int);
263 break;
264 case 'd':
265 if (longflag)
266 sprintf(s, "%ld", va_arg(vargs, long));
267 else
268 sprintf(s, "%d", va_arg(vargs, int));
269 s += strlen(s);
270 break;
271 case 'i':
272 sprintf(s, "%i", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'x':
276 sprintf(s, "%x", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 's':
280 p = va_arg(vargs, char*);
281 i = strlen(p);
282 if (n > 0 && i > n)
283 i = n;
284 memcpy(s, p, i);
285 s += i;
286 break;
287 case 'p':
288 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000289 /* %p is ill-defined: ensure leading 0x. */
290 if (s[1] == 'X')
291 s[1] = 'x';
292 else if (s[1] != 'x') {
293 memmove(s+2, s, strlen(s)+1);
294 s[0] = '0';
295 s[1] = 'x';
296 }
Barry Warsawdadace02001-08-24 18:32:06 +0000297 s += strlen(s);
298 break;
299 case '%':
300 *s++ = '%';
301 break;
302 default:
303 strcpy(s, p);
304 s += strlen(s);
305 goto end;
306 }
307 } else
308 *s++ = *f;
309 }
310
311 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000313 return string;
314}
315
316PyObject *
317PyString_FromFormat(const char *format, ...)
318{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000319 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000320 va_list vargs;
321
322#ifdef HAVE_STDARG_PROTOTYPES
323 va_start(vargs, format);
324#else
325 va_start(vargs);
326#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000327 ret = PyString_FromFormatV(format, vargs);
328 va_end(vargs);
329 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000330}
331
332
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000333PyObject *PyString_Decode(const char *s,
334 int size,
335 const char *encoding,
336 const char *errors)
337{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000338 PyObject *v, *str;
339
340 str = PyString_FromStringAndSize(s, size);
341 if (str == NULL)
342 return NULL;
343 v = PyString_AsDecodedString(str, encoding, errors);
344 Py_DECREF(str);
345 return v;
346}
347
348PyObject *PyString_AsDecodedObject(PyObject *str,
349 const char *encoding,
350 const char *errors)
351{
352 PyObject *v;
353
354 if (!PyString_Check(str)) {
355 PyErr_BadArgument();
356 goto onError;
357 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000358
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000359 if (encoding == NULL) {
360#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000362#else
363 PyErr_SetString(PyExc_ValueError, "no encoding specified");
364 goto onError;
365#endif
366 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367
368 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 v = PyCodec_Decode(str, encoding, errors);
370 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372
373 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000374
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376 return NULL;
377}
378
379PyObject *PyString_AsDecodedString(PyObject *str,
380 const char *encoding,
381 const char *errors)
382{
383 PyObject *v;
384
385 v = PyString_AsDecodedObject(str, encoding, errors);
386 if (v == NULL)
387 goto onError;
388
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000389#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 /* Convert Unicode to a string using the default encoding */
391 if (PyUnicode_Check(v)) {
392 PyObject *temp = v;
393 v = PyUnicode_AsEncodedString(v, NULL, NULL);
394 Py_DECREF(temp);
395 if (v == NULL)
396 goto onError;
397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000398#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 if (!PyString_Check(v)) {
400 PyErr_Format(PyExc_TypeError,
401 "decoder did not return a string object (type=%.400s)",
402 v->ob_type->tp_name);
403 Py_DECREF(v);
404 goto onError;
405 }
406
407 return v;
408
409 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000410 return NULL;
411}
412
413PyObject *PyString_Encode(const char *s,
414 int size,
415 const char *encoding,
416 const char *errors)
417{
418 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000419
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000420 str = PyString_FromStringAndSize(s, size);
421 if (str == NULL)
422 return NULL;
423 v = PyString_AsEncodedString(str, encoding, errors);
424 Py_DECREF(str);
425 return v;
426}
427
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000428PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 const char *encoding,
430 const char *errors)
431{
432 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000433
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 if (!PyString_Check(str)) {
435 PyErr_BadArgument();
436 goto onError;
437 }
438
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000439 if (encoding == NULL) {
440#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000442#else
443 PyErr_SetString(PyExc_ValueError, "no encoding specified");
444 goto onError;
445#endif
446 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447
448 /* Encode via the codec registry */
449 v = PyCodec_Encode(str, encoding, errors);
450 if (v == NULL)
451 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452
453 return v;
454
455 onError:
456 return NULL;
457}
458
459PyObject *PyString_AsEncodedString(PyObject *str,
460 const char *encoding,
461 const char *errors)
462{
463 PyObject *v;
464
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000465 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000466 if (v == NULL)
467 goto onError;
468
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000469#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 /* Convert Unicode to a string using the default encoding */
471 if (PyUnicode_Check(v)) {
472 PyObject *temp = v;
473 v = PyUnicode_AsEncodedString(v, NULL, NULL);
474 Py_DECREF(temp);
475 if (v == NULL)
476 goto onError;
477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000478#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 if (!PyString_Check(v)) {
480 PyErr_Format(PyExc_TypeError,
481 "encoder did not return a string object (type=%.400s)",
482 v->ob_type->tp_name);
483 Py_DECREF(v);
484 goto onError;
485 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000488
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000489 onError:
490 return NULL;
491}
492
Guido van Rossum234f9421993-06-17 12:35:49 +0000493static void
Fred Drakeba096332000-07-09 07:04:36 +0000494string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000495{
Guido van Rossum9475a232001-10-05 20:51:39 +0000496 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000497}
498
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000499static int
500string_getsize(register PyObject *op)
501{
502 char *s;
503 int len;
504 if (PyString_AsStringAndSize(op, &s, &len))
505 return -1;
506 return len;
507}
508
509static /*const*/ char *
510string_getbuffer(register PyObject *op)
511{
512 char *s;
513 int len;
514 if (PyString_AsStringAndSize(op, &s, &len))
515 return NULL;
516 return s;
517}
518
Guido van Rossumd7047b31995-01-02 19:07:15 +0000519int
Fred Drakeba096332000-07-09 07:04:36 +0000520PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000522 if (!PyString_Check(op))
523 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000524 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525}
526
527/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000528PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (!PyString_Check(op))
531 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000532 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533}
534
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000535int
536PyString_AsStringAndSize(register PyObject *obj,
537 register char **s,
538 register int *len)
539{
540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
544
545 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000546#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000547 if (PyUnicode_Check(obj)) {
548 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
549 if (obj == NULL)
550 return -1;
551 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000552 else
553#endif
554 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000555 PyErr_Format(PyExc_TypeError,
556 "expected string or Unicode object, "
557 "%.200s found", obj->ob_type->tp_name);
558 return -1;
559 }
560 }
561
562 *s = PyString_AS_STRING(obj);
563 if (len != NULL)
564 *len = PyString_GET_SIZE(obj);
565 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
566 PyErr_SetString(PyExc_TypeError,
567 "expected string without null bytes");
568 return -1;
569 }
570 return 0;
571}
572
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573/* Methods */
574
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000575static int
Fred Drakeba096332000-07-09 07:04:36 +0000576string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000577{
578 int i;
579 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000580 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000581
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000582 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000583 if (! PyString_CheckExact(op)) {
584 int ret;
585 /* A str subclass may have its own __str__ method. */
586 op = (PyStringObject *) PyObject_Str((PyObject *)op);
587 if (op == NULL)
588 return -1;
589 ret = string_print(op, fp, flags);
590 Py_DECREF(op);
591 return ret;
592 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000593 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000595 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000596 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000597
Thomas Wouters7e474022000-07-16 12:04:32 +0000598 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000599 quote = '\'';
600 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
601 quote = '"';
602
603 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 for (i = 0; i < op->ob_size; i++) {
605 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000606 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000608 else if (c == '\t')
609 fprintf(fp, "\\t");
610 else if (c == '\n')
611 fprintf(fp, "\\n");
612 else if (c == '\r')
613 fprintf(fp, "\\r");
614 else if (c < ' ' || c >= 0x7f)
615 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000617 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000619 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000620 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621}
622
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000623static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000624string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000626 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
627 PyObject *v;
628 if (newsize > INT_MAX) {
629 PyErr_SetString(PyExc_OverflowError,
630 "string is too large to make repr");
631 }
632 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000633 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000634 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000635 }
636 else {
637 register int i;
638 register char c;
639 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000640 int quote;
641
Thomas Wouters7e474022000-07-16 12:04:32 +0000642 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 quote = '\'';
644 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
645 quote = '"';
646
Tim Peters9161c8b2001-12-03 01:55:38 +0000647 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000648 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000650 /* There's at least enough room for a hex escape
651 and a closing quote. */
652 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000654 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000656 else if (c == '\t')
657 *p++ = '\\', *p++ = 't';
658 else if (c == '\n')
659 *p++ = '\\', *p++ = 'n';
660 else if (c == '\r')
661 *p++ = '\\', *p++ = 'r';
662 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 /* For performance, we don't want to call
664 PyOS_snprintf here (extra layers of
665 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000666 sprintf(p, "\\x%02x", c & 0xff);
667 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668 }
669 else
670 *p++ = c;
671 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000672 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000673 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000675 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000676 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000677 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000679}
680
Guido van Rossum189f1df2001-05-01 16:51:53 +0000681static PyObject *
682string_str(PyObject *s)
683{
Tim Petersc9933152001-10-16 20:18:24 +0000684 assert(PyString_Check(s));
685 if (PyString_CheckExact(s)) {
686 Py_INCREF(s);
687 return s;
688 }
689 else {
690 /* Subtype -- return genuine string with the same value. */
691 PyStringObject *t = (PyStringObject *) s;
692 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
693 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000694}
695
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696static int
Fred Drakeba096332000-07-09 07:04:36 +0000697string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698{
699 return a->ob_size;
700}
701
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000702static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000703string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704{
705 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 register PyStringObject *op;
707 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000708#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000709 if (PyUnicode_Check(bb))
710 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000711#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000712 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000713 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000714 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715 return NULL;
716 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000717#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000719 if ((a->ob_size == 0 || b->ob_size == 0) &&
720 PyString_CheckExact(a) && PyString_CheckExact(b)) {
721 if (a->ob_size == 0) {
722 Py_INCREF(bb);
723 return bb;
724 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 Py_INCREF(a);
726 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727 }
728 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000729 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000731 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000732 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000734 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000735#ifdef CACHE_HASH
736 op->ob_shash = -1;
737#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000738#ifdef INTERN_STRINGS
739 op->ob_sinterned = NULL;
740#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000741 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
742 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
743 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000744 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745#undef b
746}
747
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000749string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750{
751 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000752 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000754 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000755 if (n < 0)
756 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000757 /* watch out for overflows: the size can overflow int,
758 * and the # of bytes needed can overflow size_t
759 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000761 if (n && size / n != a->ob_size) {
762 PyErr_SetString(PyExc_OverflowError,
763 "repeated string is too long");
764 return NULL;
765 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000766 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 Py_INCREF(a);
768 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769 }
Tim Peters8f422462000-09-09 06:13:41 +0000770 nbytes = size * sizeof(char);
771 if (nbytes / sizeof(char) != (size_t)size ||
772 nbytes + sizeof(PyStringObject) <= nbytes) {
773 PyErr_SetString(PyExc_OverflowError,
774 "repeated string is too long");
775 return NULL;
776 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000778 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000779 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000780 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000781 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000782#ifdef CACHE_HASH
783 op->ob_shash = -1;
784#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000785#ifdef INTERN_STRINGS
786 op->ob_sinterned = NULL;
787#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000788 for (i = 0; i < size; i += a->ob_size)
789 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
790 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792}
793
794/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
795
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000797string_slice(register PyStringObject *a, register int i, register int j)
798 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
800 if (i < 0)
801 i = 0;
802 if (j < 0)
803 j = 0; /* Avoid signed/unsigned bug in next line */
804 if (j > a->ob_size)
805 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000806 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
807 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 Py_INCREF(a);
809 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
811 if (j < i)
812 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000813 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814}
815
Guido van Rossum9284a572000-03-07 15:53:43 +0000816static int
Fred Drakeba096332000-07-09 07:04:36 +0000817string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000818{
819 register char *s, *end;
820 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000821#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000822 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000823 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000824#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000825 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000826 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000827 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000828 return -1;
829 }
830 c = PyString_AsString(el)[0];
831 s = PyString_AsString(a);
832 end = s + PyString_Size(a);
833 while (s < end) {
834 if (c == *s++)
835 return 1;
836 }
837 return 0;
838}
839
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000840static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000841string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000842{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000843 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000844 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000846 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 return NULL;
848 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000849 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000850 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000851 if (v == NULL)
852 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000853 else {
854#ifdef COUNT_ALLOCS
855 one_strings++;
856#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000857 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000858 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860}
861
Martin v. Löwiscd353062001-05-24 16:56:35 +0000862static PyObject*
863string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000865 int c;
866 int len_a, len_b;
867 int min_len;
868 PyObject *result;
869
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000870 /* Make sure both arguments are strings. */
871 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000872 result = Py_NotImplemented;
873 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000874 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000875 if (a == b) {
876 switch (op) {
877 case Py_EQ:case Py_LE:case Py_GE:
878 result = Py_True;
879 goto out;
880 case Py_NE:case Py_LT:case Py_GT:
881 result = Py_False;
882 goto out;
883 }
884 }
885 if (op == Py_EQ) {
886 /* Supporting Py_NE here as well does not save
887 much time, since Py_NE is rarely used. */
888 if (a->ob_size == b->ob_size
889 && (a->ob_sval[0] == b->ob_sval[0]
890 && memcmp(a->ob_sval, b->ob_sval,
891 a->ob_size) == 0)) {
892 result = Py_True;
893 } else {
894 result = Py_False;
895 }
896 goto out;
897 }
898 len_a = a->ob_size; len_b = b->ob_size;
899 min_len = (len_a < len_b) ? len_a : len_b;
900 if (min_len > 0) {
901 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
902 if (c==0)
903 c = memcmp(a->ob_sval, b->ob_sval, min_len);
904 }else
905 c = 0;
906 if (c == 0)
907 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
908 switch (op) {
909 case Py_LT: c = c < 0; break;
910 case Py_LE: c = c <= 0; break;
911 case Py_EQ: assert(0); break; /* unreachable */
912 case Py_NE: c = c != 0; break;
913 case Py_GT: c = c > 0; break;
914 case Py_GE: c = c >= 0; break;
915 default:
916 result = Py_NotImplemented;
917 goto out;
918 }
919 result = c ? Py_True : Py_False;
920 out:
921 Py_INCREF(result);
922 return result;
923}
924
925int
926_PyString_Eq(PyObject *o1, PyObject *o2)
927{
928 PyStringObject *a, *b;
929 a = (PyStringObject*)o1;
930 b = (PyStringObject*)o2;
931 return a->ob_size == b->ob_size
932 && *a->ob_sval == *b->ob_sval
933 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934}
935
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936static long
Fred Drakeba096332000-07-09 07:04:36 +0000937string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000938{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 register int len;
940 register unsigned char *p;
941 register long x;
942
943#ifdef CACHE_HASH
944 if (a->ob_shash != -1)
945 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000946#ifdef INTERN_STRINGS
947 if (a->ob_sinterned != NULL)
948 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000950#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000951#endif
952 len = a->ob_size;
953 p = (unsigned char *) a->ob_sval;
954 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000955 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000956 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000957 x ^= a->ob_size;
958 if (x == -1)
959 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960#ifdef CACHE_HASH
961 a->ob_shash = x;
962#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000963 return x;
964}
965
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000966static int
Fred Drakeba096332000-07-09 07:04:36 +0000967string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000968{
969 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000970 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000971 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000972 return -1;
973 }
974 *ptr = (void *)self->ob_sval;
975 return self->ob_size;
976}
977
978static int
Fred Drakeba096332000-07-09 07:04:36 +0000979string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980{
Guido van Rossum045e6881997-09-08 18:30:11 +0000981 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000982 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000983 return -1;
984}
985
986static int
Fred Drakeba096332000-07-09 07:04:36 +0000987string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000988{
989 if ( lenp )
990 *lenp = self->ob_size;
991 return 1;
992}
993
Guido van Rossum1db70701998-10-08 02:18:52 +0000994static int
Fred Drakeba096332000-07-09 07:04:36 +0000995string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000996{
997 if ( index != 0 ) {
998 PyErr_SetString(PyExc_SystemError,
999 "accessing non-existent string segment");
1000 return -1;
1001 }
1002 *ptr = self->ob_sval;
1003 return self->ob_size;
1004}
1005
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001007 (inquiry)string_length, /*sq_length*/
1008 (binaryfunc)string_concat, /*sq_concat*/
1009 (intargfunc)string_repeat, /*sq_repeat*/
1010 (intargfunc)string_item, /*sq_item*/
1011 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001012 0, /*sq_ass_item*/
1013 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001014 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015};
1016
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001017static PyBufferProcs string_as_buffer = {
1018 (getreadbufferproc)string_buffer_getreadbuf,
1019 (getwritebufferproc)string_buffer_getwritebuf,
1020 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001021 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001022};
1023
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001024
1025
1026#define LEFTSTRIP 0
1027#define RIGHTSTRIP 1
1028#define BOTHSTRIP 2
1029
1030
1031static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001032split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001034 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035 PyObject* item;
1036 PyObject *list = PyList_New(0);
1037
1038 if (list == NULL)
1039 return NULL;
1040
Guido van Rossum4c08d552000-03-10 22:55:18 +00001041 for (i = j = 0; i < len; ) {
1042 while (i < len && isspace(Py_CHARMASK(s[i])))
1043 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001045 while (i < len && !isspace(Py_CHARMASK(s[i])))
1046 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001048 if (maxsplit-- <= 0)
1049 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1051 if (item == NULL)
1052 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053 err = PyList_Append(list, item);
1054 Py_DECREF(item);
1055 if (err < 0)
1056 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001057 while (i < len && isspace(Py_CHARMASK(s[i])))
1058 i++;
1059 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001060 }
1061 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062 if (j < len) {
1063 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1064 if (item == NULL)
1065 goto finally;
1066 err = PyList_Append(list, item);
1067 Py_DECREF(item);
1068 if (err < 0)
1069 goto finally;
1070 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001071 return list;
1072 finally:
1073 Py_DECREF(list);
1074 return NULL;
1075}
1076
1077
1078static char split__doc__[] =
1079"S.split([sep [,maxsplit]]) -> list of strings\n\
1080\n\
1081Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001082delimiter string. If maxsplit is given, at most maxsplit\n\
1083splits are done. If sep is not specified, any whitespace string\n\
1084is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001085
1086static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001087string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088{
1089 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090 int maxsplit = -1;
1091 const char *s = PyString_AS_STRING(self), *sub;
1092 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001096 if (maxsplit < 0)
1097 maxsplit = INT_MAX;
1098 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001100 if (PyString_Check(subobj)) {
1101 sub = PyString_AS_STRING(subobj);
1102 n = PyString_GET_SIZE(subobj);
1103 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001104#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001105 else if (PyUnicode_Check(subobj))
1106 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001107#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001108 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1109 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001110 if (n == 0) {
1111 PyErr_SetString(PyExc_ValueError, "empty separator");
1112 return NULL;
1113 }
1114
1115 list = PyList_New(0);
1116 if (list == NULL)
1117 return NULL;
1118
1119 i = j = 0;
1120 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001121 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001122 if (maxsplit-- <= 0)
1123 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001124 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1125 if (item == NULL)
1126 goto fail;
1127 err = PyList_Append(list, item);
1128 Py_DECREF(item);
1129 if (err < 0)
1130 goto fail;
1131 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132 }
1133 else
1134 i++;
1135 }
1136 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1137 if (item == NULL)
1138 goto fail;
1139 err = PyList_Append(list, item);
1140 Py_DECREF(item);
1141 if (err < 0)
1142 goto fail;
1143
1144 return list;
1145
1146 fail:
1147 Py_DECREF(list);
1148 return NULL;
1149}
1150
1151
1152static char join__doc__[] =
1153"S.join(sequence) -> string\n\
1154\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001155Return a string which is the concatenation of the strings in the\n\
1156sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157
1158static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001159string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001160{
1161 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001162 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001164 char *p;
1165 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001166 size_t sz = 0;
1167 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001168 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169
Tim Peters19fe14e2001-01-19 03:03:47 +00001170 seq = PySequence_Fast(orig, "");
1171 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001172 if (PyErr_ExceptionMatches(PyExc_TypeError))
1173 PyErr_Format(PyExc_TypeError,
1174 "sequence expected, %.80s found",
1175 orig->ob_type->tp_name);
1176 return NULL;
1177 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001178
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001179 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001180 if (seqlen == 0) {
1181 Py_DECREF(seq);
1182 return PyString_FromString("");
1183 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001184 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001186 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1187 PyErr_Format(PyExc_TypeError,
1188 "sequence item 0: expected string,"
1189 " %.80s found",
1190 item->ob_type->tp_name);
1191 Py_DECREF(seq);
1192 return NULL;
1193 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001194 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001195 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001196 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001197 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001198
Tim Peters19fe14e2001-01-19 03:03:47 +00001199 /* There are at least two things to join. Do a pre-pass to figure out
1200 * the total amount of space we'll need (sz), see whether any argument
1201 * is absurd, and defer to the Unicode join if appropriate.
1202 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001203 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001204 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001205 item = PySequence_Fast_GET_ITEM(seq, i);
1206 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001207#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001208 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001209 /* Defer to Unicode join.
1210 * CAUTION: There's no gurantee that the
1211 * original sequence can be iterated over
1212 * again, so we must pass seq here.
1213 */
1214 PyObject *result;
1215 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001216 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001217 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001218 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001219#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001220 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001221 "sequence item %i: expected string,"
1222 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001223 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001224 Py_DECREF(seq);
1225 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001226 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001227 sz += PyString_GET_SIZE(item);
1228 if (i != 0)
1229 sz += seplen;
1230 if (sz < old_sz || sz > INT_MAX) {
1231 PyErr_SetString(PyExc_OverflowError,
1232 "join() is too long for a Python string");
1233 Py_DECREF(seq);
1234 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001236 }
1237
1238 /* Allocate result space. */
1239 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1240 if (res == NULL) {
1241 Py_DECREF(seq);
1242 return NULL;
1243 }
1244
1245 /* Catenate everything. */
1246 p = PyString_AS_STRING(res);
1247 for (i = 0; i < seqlen; ++i) {
1248 size_t n;
1249 item = PySequence_Fast_GET_ITEM(seq, i);
1250 n = PyString_GET_SIZE(item);
1251 memcpy(p, PyString_AS_STRING(item), n);
1252 p += n;
1253 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001254 memcpy(p, sep, seplen);
1255 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001256 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001258
Jeremy Hylton49048292000-07-11 03:28:17 +00001259 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261}
1262
Tim Peters52e155e2001-06-16 05:42:57 +00001263PyObject *
1264_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001265{
Tim Petersa7259592001-06-16 05:11:17 +00001266 assert(sep != NULL && PyString_Check(sep));
1267 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001268 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001269}
1270
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271static long
Fred Drakeba096332000-07-09 07:04:36 +00001272string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275 int len = PyString_GET_SIZE(self);
1276 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001277 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001279 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001280 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001281 return -2;
1282 if (PyString_Check(subobj)) {
1283 sub = PyString_AS_STRING(subobj);
1284 n = PyString_GET_SIZE(subobj);
1285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001286#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 else if (PyUnicode_Check(subobj))
1288 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001289#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 return -2;
1292
1293 if (last > len)
1294 last = len;
1295 if (last < 0)
1296 last += len;
1297 if (last < 0)
1298 last = 0;
1299 if (i < 0)
1300 i += len;
1301 if (i < 0)
1302 i = 0;
1303
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 if (dir > 0) {
1305 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 last -= n;
1308 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001309 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001310 return (long)i;
1311 }
1312 else {
1313 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001314
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 if (n == 0 && i <= last)
1316 return (long)last;
1317 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001318 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 return (long)j;
1320 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001321
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322 return -1;
1323}
1324
1325
1326static char find__doc__[] =
1327"S.find(sub [,start [,end]]) -> int\n\
1328\n\
1329Return the lowest index in S where substring sub is found,\n\
1330such that sub is contained within s[start,end]. Optional\n\
1331arguments start and end are interpreted as in slice notation.\n\
1332\n\
1333Return -1 on failure.";
1334
1335static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001336string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 if (result == -2)
1340 return NULL;
1341 return PyInt_FromLong(result);
1342}
1343
1344
1345static char index__doc__[] =
1346"S.index(sub [,start [,end]]) -> int\n\
1347\n\
1348Like S.find() but raise ValueError when the substring is not found.";
1349
1350static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001351string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 if (result == -2)
1355 return NULL;
1356 if (result == -1) {
1357 PyErr_SetString(PyExc_ValueError,
1358 "substring not found in string.index");
1359 return NULL;
1360 }
1361 return PyInt_FromLong(result);
1362}
1363
1364
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365static char rfind__doc__[] =
1366"S.rfind(sub [,start [,end]]) -> int\n\
1367\n\
1368Return the highest index in S where substring sub is found,\n\
1369such that sub is contained within s[start,end]. Optional\n\
1370arguments start and end are interpreted as in slice notation.\n\
1371\n\
1372Return -1 on failure.";
1373
1374static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001375string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 if (result == -2)
1379 return NULL;
1380 return PyInt_FromLong(result);
1381}
1382
1383
1384static char rindex__doc__[] =
1385"S.rindex(sub [,start [,end]]) -> int\n\
1386\n\
1387Like S.rfind() but raise ValueError when the substring is not found.";
1388
1389static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001390string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001392 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 if (result == -2)
1394 return NULL;
1395 if (result == -1) {
1396 PyErr_SetString(PyExc_ValueError,
1397 "substring not found in string.rindex");
1398 return NULL;
1399 }
1400 return PyInt_FromLong(result);
1401}
1402
1403
1404static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001405do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
1407 char *s = PyString_AS_STRING(self);
1408 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 i = 0;
1411 if (striptype != RIGHTSTRIP) {
1412 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1413 i++;
1414 }
1415 }
1416
1417 j = len;
1418 if (striptype != LEFTSTRIP) {
1419 do {
1420 j--;
1421 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1422 j++;
1423 }
1424
Tim Peters8fa5dd02001-09-12 02:18:30 +00001425 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 Py_INCREF(self);
1427 return (PyObject*)self;
1428 }
1429 else
1430 return PyString_FromStringAndSize(s+i, j-i);
1431}
1432
1433
1434static char strip__doc__[] =
1435"S.strip() -> string\n\
1436\n\
1437Return a copy of the string S with leading and trailing\n\
1438whitespace removed.";
1439
1440static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001441string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001442{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001443 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444}
1445
1446
1447static char lstrip__doc__[] =
1448"S.lstrip() -> string\n\
1449\n\
1450Return a copy of the string S with leading whitespace removed.";
1451
1452static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001453string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001455 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456}
1457
1458
1459static char rstrip__doc__[] =
1460"S.rstrip() -> string\n\
1461\n\
1462Return a copy of the string S with trailing whitespace removed.";
1463
1464static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001465string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001467 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468}
1469
1470
1471static char lower__doc__[] =
1472"S.lower() -> string\n\
1473\n\
1474Return a copy of the string S converted to lowercase.";
1475
1476static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001477string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478{
1479 char *s = PyString_AS_STRING(self), *s_new;
1480 int i, n = PyString_GET_SIZE(self);
1481 PyObject *new;
1482
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 new = PyString_FromStringAndSize(NULL, n);
1484 if (new == NULL)
1485 return NULL;
1486 s_new = PyString_AsString(new);
1487 for (i = 0; i < n; i++) {
1488 int c = Py_CHARMASK(*s++);
1489 if (isupper(c)) {
1490 *s_new = tolower(c);
1491 } else
1492 *s_new = c;
1493 s_new++;
1494 }
1495 return new;
1496}
1497
1498
1499static char upper__doc__[] =
1500"S.upper() -> string\n\
1501\n\
1502Return a copy of the string S converted to uppercase.";
1503
1504static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001505string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506{
1507 char *s = PyString_AS_STRING(self), *s_new;
1508 int i, n = PyString_GET_SIZE(self);
1509 PyObject *new;
1510
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 new = PyString_FromStringAndSize(NULL, n);
1512 if (new == NULL)
1513 return NULL;
1514 s_new = PyString_AsString(new);
1515 for (i = 0; i < n; i++) {
1516 int c = Py_CHARMASK(*s++);
1517 if (islower(c)) {
1518 *s_new = toupper(c);
1519 } else
1520 *s_new = c;
1521 s_new++;
1522 }
1523 return new;
1524}
1525
1526
Guido van Rossum4c08d552000-03-10 22:55:18 +00001527static char title__doc__[] =
1528"S.title() -> string\n\
1529\n\
1530Return a titlecased version of S, i.e. words start with uppercase\n\
1531characters, all remaining cased characters have lowercase.";
1532
1533static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001534string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535{
1536 char *s = PyString_AS_STRING(self), *s_new;
1537 int i, n = PyString_GET_SIZE(self);
1538 int previous_is_cased = 0;
1539 PyObject *new;
1540
Guido van Rossum4c08d552000-03-10 22:55:18 +00001541 new = PyString_FromStringAndSize(NULL, n);
1542 if (new == NULL)
1543 return NULL;
1544 s_new = PyString_AsString(new);
1545 for (i = 0; i < n; i++) {
1546 int c = Py_CHARMASK(*s++);
1547 if (islower(c)) {
1548 if (!previous_is_cased)
1549 c = toupper(c);
1550 previous_is_cased = 1;
1551 } else if (isupper(c)) {
1552 if (previous_is_cased)
1553 c = tolower(c);
1554 previous_is_cased = 1;
1555 } else
1556 previous_is_cased = 0;
1557 *s_new++ = c;
1558 }
1559 return new;
1560}
1561
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562static char capitalize__doc__[] =
1563"S.capitalize() -> string\n\
1564\n\
1565Return a copy of the string S with only its first character\n\
1566capitalized.";
1567
1568static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001569string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570{
1571 char *s = PyString_AS_STRING(self), *s_new;
1572 int i, n = PyString_GET_SIZE(self);
1573 PyObject *new;
1574
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 new = PyString_FromStringAndSize(NULL, n);
1576 if (new == NULL)
1577 return NULL;
1578 s_new = PyString_AsString(new);
1579 if (0 < n) {
1580 int c = Py_CHARMASK(*s++);
1581 if (islower(c))
1582 *s_new = toupper(c);
1583 else
1584 *s_new = c;
1585 s_new++;
1586 }
1587 for (i = 1; i < n; i++) {
1588 int c = Py_CHARMASK(*s++);
1589 if (isupper(c))
1590 *s_new = tolower(c);
1591 else
1592 *s_new = c;
1593 s_new++;
1594 }
1595 return new;
1596}
1597
1598
1599static char count__doc__[] =
1600"S.count(sub[, start[, end]]) -> int\n\
1601\n\
1602Return the number of occurrences of substring sub in string\n\
1603S[start:end]. Optional arguments start and end are\n\
1604interpreted as in slice notation.";
1605
1606static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001607string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001609 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610 int len = PyString_GET_SIZE(self), n;
1611 int i = 0, last = INT_MAX;
1612 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001613 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614
Guido van Rossumc6821402000-05-08 14:08:05 +00001615 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1616 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001618
Guido van Rossum4c08d552000-03-10 22:55:18 +00001619 if (PyString_Check(subobj)) {
1620 sub = PyString_AS_STRING(subobj);
1621 n = PyString_GET_SIZE(subobj);
1622 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001623#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001624 else if (PyUnicode_Check(subobj)) {
1625 int count;
1626 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1627 if (count == -1)
1628 return NULL;
1629 else
1630 return PyInt_FromLong((long) count);
1631 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001632#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001633 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1634 return NULL;
1635
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636 if (last > len)
1637 last = len;
1638 if (last < 0)
1639 last += len;
1640 if (last < 0)
1641 last = 0;
1642 if (i < 0)
1643 i += len;
1644 if (i < 0)
1645 i = 0;
1646 m = last + 1 - n;
1647 if (n == 0)
1648 return PyInt_FromLong((long) (m-i));
1649
1650 r = 0;
1651 while (i < m) {
1652 if (!memcmp(s+i, sub, n)) {
1653 r++;
1654 i += n;
1655 } else {
1656 i++;
1657 }
1658 }
1659 return PyInt_FromLong((long) r);
1660}
1661
1662
1663static char swapcase__doc__[] =
1664"S.swapcase() -> string\n\
1665\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001666Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667converted to lowercase and vice versa.";
1668
1669static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001670string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671{
1672 char *s = PyString_AS_STRING(self), *s_new;
1673 int i, n = PyString_GET_SIZE(self);
1674 PyObject *new;
1675
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676 new = PyString_FromStringAndSize(NULL, n);
1677 if (new == NULL)
1678 return NULL;
1679 s_new = PyString_AsString(new);
1680 for (i = 0; i < n; i++) {
1681 int c = Py_CHARMASK(*s++);
1682 if (islower(c)) {
1683 *s_new = toupper(c);
1684 }
1685 else if (isupper(c)) {
1686 *s_new = tolower(c);
1687 }
1688 else
1689 *s_new = c;
1690 s_new++;
1691 }
1692 return new;
1693}
1694
1695
1696static char translate__doc__[] =
1697"S.translate(table [,deletechars]) -> string\n\
1698\n\
1699Return a copy of the string S, where all characters occurring\n\
1700in the optional argument deletechars are removed, and the\n\
1701remaining characters have been mapped through the given\n\
1702translation table, which must be a string of length 256.";
1703
1704static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001705string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001707 register char *input, *output;
1708 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709 register int i, c, changed = 0;
1710 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001711 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001712 int inlen, tablen, dellen = 0;
1713 PyObject *result;
1714 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716
Guido van Rossum4c08d552000-03-10 22:55:18 +00001717 if (!PyArg_ParseTuple(args, "O|O:translate",
1718 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001720
1721 if (PyString_Check(tableobj)) {
1722 table1 = PyString_AS_STRING(tableobj);
1723 tablen = PyString_GET_SIZE(tableobj);
1724 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001725#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001726 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001727 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 parameter; instead a mapping to None will cause characters
1729 to be deleted. */
1730 if (delobj != NULL) {
1731 PyErr_SetString(PyExc_TypeError,
1732 "deletions are implemented differently for unicode");
1733 return NULL;
1734 }
1735 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1736 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001737#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001738 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740
1741 if (delobj != NULL) {
1742 if (PyString_Check(delobj)) {
1743 del_table = PyString_AS_STRING(delobj);
1744 dellen = PyString_GET_SIZE(delobj);
1745 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001746#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 else if (PyUnicode_Check(delobj)) {
1748 PyErr_SetString(PyExc_TypeError,
1749 "deletions are implemented differently for unicode");
1750 return NULL;
1751 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001752#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1754 return NULL;
1755
1756 if (tablen != 256) {
1757 PyErr_SetString(PyExc_ValueError,
1758 "translation table must be 256 characters long");
1759 return NULL;
1760 }
1761 }
1762 else {
1763 del_table = NULL;
1764 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
1766
1767 table = table1;
1768 inlen = PyString_Size(input_obj);
1769 result = PyString_FromStringAndSize((char *)NULL, inlen);
1770 if (result == NULL)
1771 return NULL;
1772 output_start = output = PyString_AsString(result);
1773 input = PyString_AsString(input_obj);
1774
1775 if (dellen == 0) {
1776 /* If no deletions are required, use faster code */
1777 for (i = inlen; --i >= 0; ) {
1778 c = Py_CHARMASK(*input++);
1779 if (Py_CHARMASK((*output++ = table[c])) != c)
1780 changed = 1;
1781 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001782 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783 return result;
1784 Py_DECREF(result);
1785 Py_INCREF(input_obj);
1786 return input_obj;
1787 }
1788
1789 for (i = 0; i < 256; i++)
1790 trans_table[i] = Py_CHARMASK(table[i]);
1791
1792 for (i = 0; i < dellen; i++)
1793 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1794
1795 for (i = inlen; --i >= 0; ) {
1796 c = Py_CHARMASK(*input++);
1797 if (trans_table[c] != -1)
1798 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1799 continue;
1800 changed = 1;
1801 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001802 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 Py_DECREF(result);
1804 Py_INCREF(input_obj);
1805 return input_obj;
1806 }
1807 /* Fix the size of the resulting string */
1808 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1809 return NULL;
1810 return result;
1811}
1812
1813
1814/* What follows is used for implementing replace(). Perry Stoll. */
1815
1816/*
1817 mymemfind
1818
1819 strstr replacement for arbitrary blocks of memory.
1820
Barry Warsaw51ac5802000-03-20 16:36:48 +00001821 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 contents of memory pointed to by PAT. Returns the index into MEM if
1823 found, or -1 if not found. If len of PAT is greater than length of
1824 MEM, the function returns -1.
1825*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001826static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001827mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828{
1829 register int ii;
1830
1831 /* pattern can not occur in the last pat_len-1 chars */
1832 len -= pat_len;
1833
1834 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001835 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 return ii;
1837 }
1838 }
1839 return -1;
1840}
1841
1842/*
1843 mymemcnt
1844
1845 Return the number of distinct times PAT is found in MEM.
1846 meaning mem=1111 and pat==11 returns 2.
1847 mem=11111 and pat==11 also return 2.
1848 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001849static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001850mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851{
1852 register int offset = 0;
1853 int nfound = 0;
1854
1855 while (len >= 0) {
1856 offset = mymemfind(mem, len, pat, pat_len);
1857 if (offset == -1)
1858 break;
1859 mem += offset + pat_len;
1860 len -= offset + pat_len;
1861 nfound++;
1862 }
1863 return nfound;
1864}
1865
1866/*
1867 mymemreplace
1868
Thomas Wouters7e474022000-07-16 12:04:32 +00001869 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 replaced with SUB.
1871
Thomas Wouters7e474022000-07-16 12:04:32 +00001872 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 of PAT in STR, then the original string is returned. Otherwise, a new
1874 string is allocated here and returned.
1875
1876 on return, out_len is:
1877 the length of output string, or
1878 -1 if the input string is returned, or
1879 unchanged if an error occurs (no memory).
1880
1881 return value is:
1882 the new string allocated locally, or
1883 NULL if an error occurred.
1884*/
1885static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001886mymemreplace(const char *str, int len, /* input string */
1887 const char *pat, int pat_len, /* pattern string to find */
1888 const char *sub, int sub_len, /* substitution string */
1889 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001890 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891{
1892 char *out_s;
1893 char *new_s;
1894 int nfound, offset, new_len;
1895
1896 if (len == 0 || pat_len > len)
1897 goto return_same;
1898
1899 /* find length of output string */
1900 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001901 if (count < 0)
1902 count = INT_MAX;
1903 else if (nfound > count)
1904 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (nfound == 0)
1906 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001907
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001909 if (new_len == 0) {
1910 /* Have to allocate something for the caller to free(). */
1911 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001912 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001913 return NULL;
1914 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001916 else {
1917 assert(new_len > 0);
1918 new_s = (char *)PyMem_MALLOC(new_len);
1919 if (new_s == NULL)
1920 return NULL;
1921 out_s = new_s;
1922
Tim Peters9c012af2001-05-10 00:32:57 +00001923 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001924 /* find index of next instance of pattern */
1925 offset = mymemfind(str, len, pat, pat_len);
1926 if (offset == -1)
1927 break;
1928
1929 /* copy non matching part of input string */
1930 memcpy(new_s, str, offset);
1931 str += offset + pat_len;
1932 len -= offset + pat_len;
1933
1934 /* copy substitute into the output string */
1935 new_s += offset;
1936 memcpy(new_s, sub, sub_len);
1937 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001938 }
1939 /* copy any remaining values into output string */
1940 if (len > 0)
1941 memcpy(new_s, str, len);
1942 }
1943 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 return out_s;
1945
1946 return_same:
1947 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001948 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949}
1950
1951
1952static char replace__doc__[] =
1953"S.replace (old, new[, maxsplit]) -> string\n\
1954\n\
1955Return a copy of string S with all occurrences of substring\n\
1956old replaced by new. If the optional argument maxsplit is\n\
1957given, only the first maxsplit occurrences are replaced.";
1958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 const char *str = PyString_AS_STRING(self), *sub, *repl;
1963 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001964 const int len = PyString_GET_SIZE(self);
1965 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001968 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 if (!PyArg_ParseTuple(args, "OO|i:replace",
1971 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001973
1974 if (PyString_Check(subobj)) {
1975 sub = PyString_AS_STRING(subobj);
1976 sub_len = PyString_GET_SIZE(subobj);
1977 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001978#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001979 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001980 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001981 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001982#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1984 return NULL;
1985
1986 if (PyString_Check(replobj)) {
1987 repl = PyString_AS_STRING(replobj);
1988 repl_len = PyString_GET_SIZE(replobj);
1989 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001990#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001992 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001993 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001994#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1996 return NULL;
1997
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001998 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001999 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 return NULL;
2001 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002002 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003 if (new_s == NULL) {
2004 PyErr_NoMemory();
2005 return NULL;
2006 }
2007 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002008 if (PyString_CheckExact(self)) {
2009 /* we're returning another reference to self */
2010 new = (PyObject*)self;
2011 Py_INCREF(new);
2012 }
2013 else {
2014 new = PyString_FromStringAndSize(str, len);
2015 if (new == NULL)
2016 return NULL;
2017 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 }
2019 else {
2020 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002021 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022 }
2023 return new;
2024}
2025
2026
2027static char startswith__doc__[] =
2028"S.startswith(prefix[, start[, end]]) -> int\n\
2029\n\
2030Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2031optional start, test S beginning at that position. With optional end, stop\n\
2032comparing S at that position.";
2033
2034static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002035string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 int plen;
2041 int start = 0;
2042 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044
Guido van Rossumc6821402000-05-08 14:08:05 +00002045 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2046 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002047 return NULL;
2048 if (PyString_Check(subobj)) {
2049 prefix = PyString_AS_STRING(subobj);
2050 plen = PyString_GET_SIZE(subobj);
2051 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002052#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002053 else if (PyUnicode_Check(subobj)) {
2054 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002055 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002056 subobj, start, end, -1);
2057 if (rc == -1)
2058 return NULL;
2059 else
2060 return PyInt_FromLong((long) rc);
2061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002062#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
2065
2066 /* adopt Java semantics for index out of range. it is legal for
2067 * offset to be == plen, but this only returns true if prefix is
2068 * the empty string.
2069 */
2070 if (start < 0 || start+plen > len)
2071 return PyInt_FromLong(0);
2072
2073 if (!memcmp(str+start, prefix, plen)) {
2074 /* did the match end after the specified end? */
2075 if (end < 0)
2076 return PyInt_FromLong(1);
2077 else if (end - start < plen)
2078 return PyInt_FromLong(0);
2079 else
2080 return PyInt_FromLong(1);
2081 }
2082 else return PyInt_FromLong(0);
2083}
2084
2085
2086static char endswith__doc__[] =
2087"S.endswith(suffix[, start[, end]]) -> int\n\
2088\n\
2089Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2090optional start, test S beginning at that position. With optional end, stop\n\
2091comparing S at that position.";
2092
2093static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002094string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 const char* suffix;
2099 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100 int start = 0;
2101 int end = -1;
2102 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104
Guido van Rossumc6821402000-05-08 14:08:05 +00002105 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2106 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 return NULL;
2108 if (PyString_Check(subobj)) {
2109 suffix = PyString_AS_STRING(subobj);
2110 slen = PyString_GET_SIZE(subobj);
2111 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002112#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002113 else if (PyUnicode_Check(subobj)) {
2114 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002115 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002116 subobj, start, end, +1);
2117 if (rc == -1)
2118 return NULL;
2119 else
2120 return PyInt_FromLong((long) rc);
2121 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002122#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124 return NULL;
2125
Guido van Rossum4c08d552000-03-10 22:55:18 +00002126 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 return PyInt_FromLong(0);
2128
2129 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 return PyInt_FromLong(1);
2134 else return PyInt_FromLong(0);
2135}
2136
2137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002138static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002139"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002140\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002141Encodes S using the codec registered for encoding. encoding defaults\n\
2142to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002143handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2144a ValueError. Other possible values are 'ignore' and 'replace'.";
2145
2146static PyObject *
2147string_encode(PyStringObject *self, PyObject *args)
2148{
2149 char *encoding = NULL;
2150 char *errors = NULL;
2151 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2152 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002153 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2154}
2155
2156
2157static char decode__doc__[] =
2158"S.decode([encoding[,errors]]) -> object\n\
2159\n\
2160Decodes S using the codec registered for encoding. encoding defaults\n\
2161to the default encoding. errors may be given to set a different error\n\
2162handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2163a ValueError. Other possible values are 'ignore' and 'replace'.";
2164
2165static PyObject *
2166string_decode(PyStringObject *self, PyObject *args)
2167{
2168 char *encoding = NULL;
2169 char *errors = NULL;
2170 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2171 return NULL;
2172 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002173}
2174
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176static char expandtabs__doc__[] =
2177"S.expandtabs([tabsize]) -> string\n\
2178\n\
2179Return a copy of S where all tab characters are expanded using spaces.\n\
2180If tabsize is not given, a tab size of 8 characters is assumed.";
2181
2182static PyObject*
2183string_expandtabs(PyStringObject *self, PyObject *args)
2184{
2185 const char *e, *p;
2186 char *q;
2187 int i, j;
2188 PyObject *u;
2189 int tabsize = 8;
2190
2191 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2192 return NULL;
2193
Thomas Wouters7e474022000-07-16 12:04:32 +00002194 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195 i = j = 0;
2196 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2197 for (p = PyString_AS_STRING(self); p < e; p++)
2198 if (*p == '\t') {
2199 if (tabsize > 0)
2200 j += tabsize - (j % tabsize);
2201 }
2202 else {
2203 j++;
2204 if (*p == '\n' || *p == '\r') {
2205 i += j;
2206 j = 0;
2207 }
2208 }
2209
2210 /* Second pass: create output string and fill it */
2211 u = PyString_FromStringAndSize(NULL, i + j);
2212 if (!u)
2213 return NULL;
2214
2215 j = 0;
2216 q = PyString_AS_STRING(u);
2217
2218 for (p = PyString_AS_STRING(self); p < e; p++)
2219 if (*p == '\t') {
2220 if (tabsize > 0) {
2221 i = tabsize - (j % tabsize);
2222 j += i;
2223 while (i--)
2224 *q++ = ' ';
2225 }
2226 }
2227 else {
2228 j++;
2229 *q++ = *p;
2230 if (*p == '\n' || *p == '\r')
2231 j = 0;
2232 }
2233
2234 return u;
2235}
2236
Tim Peters8fa5dd02001-09-12 02:18:30 +00002237static PyObject *
2238pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239{
2240 PyObject *u;
2241
2242 if (left < 0)
2243 left = 0;
2244 if (right < 0)
2245 right = 0;
2246
Tim Peters8fa5dd02001-09-12 02:18:30 +00002247 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248 Py_INCREF(self);
2249 return (PyObject *)self;
2250 }
2251
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002252 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 left + PyString_GET_SIZE(self) + right);
2254 if (u) {
2255 if (left)
2256 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002257 memcpy(PyString_AS_STRING(u) + left,
2258 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 PyString_GET_SIZE(self));
2260 if (right)
2261 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2262 fill, right);
2263 }
2264
2265 return u;
2266}
2267
2268static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002269"S.ljust(width) -> string\n"
2270"\n"
2271"Return S left justified in a string of length width. Padding is\n"
2272"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273
2274static PyObject *
2275string_ljust(PyStringObject *self, PyObject *args)
2276{
2277 int width;
2278 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2279 return NULL;
2280
Tim Peters8fa5dd02001-09-12 02:18:30 +00002281 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 Py_INCREF(self);
2283 return (PyObject*) self;
2284 }
2285
2286 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2287}
2288
2289
2290static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002291"S.rjust(width) -> string\n"
2292"\n"
2293"Return S right justified in a string of length width. Padding is\n"
2294"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295
2296static PyObject *
2297string_rjust(PyStringObject *self, PyObject *args)
2298{
2299 int width;
2300 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2301 return NULL;
2302
Tim Peters8fa5dd02001-09-12 02:18:30 +00002303 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 Py_INCREF(self);
2305 return (PyObject*) self;
2306 }
2307
2308 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2309}
2310
2311
2312static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002313"S.center(width) -> string\n"
2314"\n"
2315"Return S centered in a string of length width. Padding is done\n"
2316"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317
2318static PyObject *
2319string_center(PyStringObject *self, PyObject *args)
2320{
2321 int marg, left;
2322 int width;
2323
2324 if (!PyArg_ParseTuple(args, "i:center", &width))
2325 return NULL;
2326
Tim Peters8fa5dd02001-09-12 02:18:30 +00002327 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 Py_INCREF(self);
2329 return (PyObject*) self;
2330 }
2331
2332 marg = width - PyString_GET_SIZE(self);
2333 left = marg / 2 + (marg & width & 1);
2334
2335 return pad(self, left, marg - left, ' ');
2336}
2337
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002339"S.isspace() -> int\n"
2340"\n"
2341"Return 1 if there are only whitespace characters in S,\n"
2342"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343
2344static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002345string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346{
Fred Drakeba096332000-07-09 07:04:36 +00002347 register const unsigned char *p
2348 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002349 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 /* Shortcut for single character strings */
2352 if (PyString_GET_SIZE(self) == 1 &&
2353 isspace(*p))
2354 return PyInt_FromLong(1);
2355
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002356 /* Special case for empty strings */
2357 if (PyString_GET_SIZE(self) == 0)
2358 return PyInt_FromLong(0);
2359
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360 e = p + PyString_GET_SIZE(self);
2361 for (; p < e; p++) {
2362 if (!isspace(*p))
2363 return PyInt_FromLong(0);
2364 }
2365 return PyInt_FromLong(1);
2366}
2367
2368
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002369static char isalpha__doc__[] =
2370"S.isalpha() -> int\n\
2371\n\
2372Return 1 if all characters in S are alphabetic\n\
2373and there is at least one character in S, 0 otherwise.";
2374
2375static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002376string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002377{
Fred Drakeba096332000-07-09 07:04:36 +00002378 register const unsigned char *p
2379 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002380 register const unsigned char *e;
2381
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002382 /* Shortcut for single character strings */
2383 if (PyString_GET_SIZE(self) == 1 &&
2384 isalpha(*p))
2385 return PyInt_FromLong(1);
2386
2387 /* Special case for empty strings */
2388 if (PyString_GET_SIZE(self) == 0)
2389 return PyInt_FromLong(0);
2390
2391 e = p + PyString_GET_SIZE(self);
2392 for (; p < e; p++) {
2393 if (!isalpha(*p))
2394 return PyInt_FromLong(0);
2395 }
2396 return PyInt_FromLong(1);
2397}
2398
2399
2400static char isalnum__doc__[] =
2401"S.isalnum() -> int\n\
2402\n\
2403Return 1 if all characters in S are alphanumeric\n\
2404and there is at least one character in S, 0 otherwise.";
2405
2406static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002407string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002408{
Fred Drakeba096332000-07-09 07:04:36 +00002409 register const unsigned char *p
2410 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002411 register const unsigned char *e;
2412
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002413 /* Shortcut for single character strings */
2414 if (PyString_GET_SIZE(self) == 1 &&
2415 isalnum(*p))
2416 return PyInt_FromLong(1);
2417
2418 /* Special case for empty strings */
2419 if (PyString_GET_SIZE(self) == 0)
2420 return PyInt_FromLong(0);
2421
2422 e = p + PyString_GET_SIZE(self);
2423 for (; p < e; p++) {
2424 if (!isalnum(*p))
2425 return PyInt_FromLong(0);
2426 }
2427 return PyInt_FromLong(1);
2428}
2429
2430
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431static char isdigit__doc__[] =
2432"S.isdigit() -> int\n\
2433\n\
2434Return 1 if there are only digit characters in S,\n\
24350 otherwise.";
2436
2437static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002438string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439{
Fred Drakeba096332000-07-09 07:04:36 +00002440 register const unsigned char *p
2441 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002442 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002443
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 /* Shortcut for single character strings */
2445 if (PyString_GET_SIZE(self) == 1 &&
2446 isdigit(*p))
2447 return PyInt_FromLong(1);
2448
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002449 /* Special case for empty strings */
2450 if (PyString_GET_SIZE(self) == 0)
2451 return PyInt_FromLong(0);
2452
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453 e = p + PyString_GET_SIZE(self);
2454 for (; p < e; p++) {
2455 if (!isdigit(*p))
2456 return PyInt_FromLong(0);
2457 }
2458 return PyInt_FromLong(1);
2459}
2460
2461
2462static char islower__doc__[] =
2463"S.islower() -> int\n\
2464\n\
2465Return 1 if all cased characters in S are lowercase and there is\n\
2466at least one cased character in S, 0 otherwise.";
2467
2468static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002469string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470{
Fred Drakeba096332000-07-09 07:04:36 +00002471 register const unsigned char *p
2472 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002473 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002474 int cased;
2475
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 /* Shortcut for single character strings */
2477 if (PyString_GET_SIZE(self) == 1)
2478 return PyInt_FromLong(islower(*p) != 0);
2479
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002480 /* Special case for empty strings */
2481 if (PyString_GET_SIZE(self) == 0)
2482 return PyInt_FromLong(0);
2483
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 e = p + PyString_GET_SIZE(self);
2485 cased = 0;
2486 for (; p < e; p++) {
2487 if (isupper(*p))
2488 return PyInt_FromLong(0);
2489 else if (!cased && islower(*p))
2490 cased = 1;
2491 }
2492 return PyInt_FromLong(cased);
2493}
2494
2495
2496static char isupper__doc__[] =
2497"S.isupper() -> int\n\
2498\n\
2499Return 1 if all cased characters in S are uppercase and there is\n\
2500at least one cased character in S, 0 otherwise.";
2501
2502static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002503string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504{
Fred Drakeba096332000-07-09 07:04:36 +00002505 register const unsigned char *p
2506 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002507 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002508 int cased;
2509
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510 /* Shortcut for single character strings */
2511 if (PyString_GET_SIZE(self) == 1)
2512 return PyInt_FromLong(isupper(*p) != 0);
2513
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002514 /* Special case for empty strings */
2515 if (PyString_GET_SIZE(self) == 0)
2516 return PyInt_FromLong(0);
2517
Guido van Rossum4c08d552000-03-10 22:55:18 +00002518 e = p + PyString_GET_SIZE(self);
2519 cased = 0;
2520 for (; p < e; p++) {
2521 if (islower(*p))
2522 return PyInt_FromLong(0);
2523 else if (!cased && isupper(*p))
2524 cased = 1;
2525 }
2526 return PyInt_FromLong(cased);
2527}
2528
2529
2530static char istitle__doc__[] =
2531"S.istitle() -> int\n\
2532\n\
2533Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2534may only follow uncased characters and lowercase characters only cased\n\
2535ones. Return 0 otherwise.";
2536
2537static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002538string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539{
Fred Drakeba096332000-07-09 07:04:36 +00002540 register const unsigned char *p
2541 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002542 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002543 int cased, previous_is_cased;
2544
Guido van Rossum4c08d552000-03-10 22:55:18 +00002545 /* Shortcut for single character strings */
2546 if (PyString_GET_SIZE(self) == 1)
2547 return PyInt_FromLong(isupper(*p) != 0);
2548
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002549 /* Special case for empty strings */
2550 if (PyString_GET_SIZE(self) == 0)
2551 return PyInt_FromLong(0);
2552
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 e = p + PyString_GET_SIZE(self);
2554 cased = 0;
2555 previous_is_cased = 0;
2556 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002557 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558
2559 if (isupper(ch)) {
2560 if (previous_is_cased)
2561 return PyInt_FromLong(0);
2562 previous_is_cased = 1;
2563 cased = 1;
2564 }
2565 else if (islower(ch)) {
2566 if (!previous_is_cased)
2567 return PyInt_FromLong(0);
2568 previous_is_cased = 1;
2569 cased = 1;
2570 }
2571 else
2572 previous_is_cased = 0;
2573 }
2574 return PyInt_FromLong(cased);
2575}
2576
2577
2578static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002579"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580\n\
2581Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002582Line breaks are not included in the resulting list unless keepends\n\
2583is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584
2585#define SPLIT_APPEND(data, left, right) \
2586 str = PyString_FromStringAndSize(data + left, right - left); \
2587 if (!str) \
2588 goto onError; \
2589 if (PyList_Append(list, str)) { \
2590 Py_DECREF(str); \
2591 goto onError; \
2592 } \
2593 else \
2594 Py_DECREF(str);
2595
2596static PyObject*
2597string_splitlines(PyStringObject *self, PyObject *args)
2598{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 register int i;
2600 register int j;
2601 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002602 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603 PyObject *list;
2604 PyObject *str;
2605 char *data;
2606
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002607 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608 return NULL;
2609
2610 data = PyString_AS_STRING(self);
2611 len = PyString_GET_SIZE(self);
2612
Guido van Rossum4c08d552000-03-10 22:55:18 +00002613 list = PyList_New(0);
2614 if (!list)
2615 goto onError;
2616
2617 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002618 int eol;
2619
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 /* Find a line and append it */
2621 while (i < len && data[i] != '\n' && data[i] != '\r')
2622 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002623
2624 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002625 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626 if (i < len) {
2627 if (data[i] == '\r' && i + 1 < len &&
2628 data[i+1] == '\n')
2629 i += 2;
2630 else
2631 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002632 if (keepends)
2633 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002634 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002635 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002636 j = i;
2637 }
2638 if (j < len) {
2639 SPLIT_APPEND(data, j, len);
2640 }
2641
2642 return list;
2643
2644 onError:
2645 Py_DECREF(list);
2646 return NULL;
2647}
2648
2649#undef SPLIT_APPEND
2650
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002652static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002653string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002654 /* Counterparts of the obsolete stropmodule functions; except
2655 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002656 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2657 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2658 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2659 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2660 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2661 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2662 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2663 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2664 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2665 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2666 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2667 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2668 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2669 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2670 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2671 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2672 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2673 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2674 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2675 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2676 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2677 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2678 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2679 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2680 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2681 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2682 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2683 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2684 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2685 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2686 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2687 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2688 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002689#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002690 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002691#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002692 {NULL, NULL} /* sentinel */
2693};
2694
Guido van Rossumae960af2001-08-30 03:11:59 +00002695staticforward PyObject *
2696str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2697
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002698static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002699string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002700{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002701 PyObject *x = NULL;
2702 static char *kwlist[] = {"object", 0};
2703
Guido van Rossumae960af2001-08-30 03:11:59 +00002704 if (type != &PyString_Type)
2705 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002706 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2707 return NULL;
2708 if (x == NULL)
2709 return PyString_FromString("");
2710 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002711}
2712
Guido van Rossumae960af2001-08-30 03:11:59 +00002713static PyObject *
2714str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2715{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002716 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002717 int n;
2718
2719 assert(PyType_IsSubtype(type, &PyString_Type));
2720 tmp = string_new(&PyString_Type, args, kwds);
2721 if (tmp == NULL)
2722 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002723 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002724 n = PyString_GET_SIZE(tmp);
2725 pnew = type->tp_alloc(type, n);
2726 if (pnew != NULL) {
2727 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2728#ifdef CACHE_HASH
2729 ((PyStringObject *)pnew)->ob_shash =
2730 ((PyStringObject *)tmp)->ob_shash;
2731#endif
2732#ifdef INTERN_STRINGS
2733 ((PyStringObject *)pnew)->ob_sinterned =
2734 ((PyStringObject *)tmp)->ob_sinterned;
2735#endif
2736 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002737 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002738 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002739}
2740
Tim Peters6d6c1a32001-08-02 04:15:00 +00002741static char string_doc[] =
2742"str(object) -> string\n\
2743\n\
2744Return a nice string representation of the object.\n\
2745If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002746
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002747PyTypeObject PyString_Type = {
2748 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002749 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002750 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002752 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002753 (destructor)string_dealloc, /* tp_dealloc */
2754 (printfunc)string_print, /* tp_print */
2755 0, /* tp_getattr */
2756 0, /* tp_setattr */
2757 0, /* tp_compare */
2758 (reprfunc)string_repr, /* tp_repr */
2759 0, /* tp_as_number */
2760 &string_as_sequence, /* tp_as_sequence */
2761 0, /* tp_as_mapping */
2762 (hashfunc)string_hash, /* tp_hash */
2763 0, /* tp_call */
2764 (reprfunc)string_str, /* tp_str */
2765 PyObject_GenericGetAttr, /* tp_getattro */
2766 0, /* tp_setattro */
2767 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002768 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002769 string_doc, /* tp_doc */
2770 0, /* tp_traverse */
2771 0, /* tp_clear */
2772 (richcmpfunc)string_richcompare, /* tp_richcompare */
2773 0, /* tp_weaklistoffset */
2774 0, /* tp_iter */
2775 0, /* tp_iternext */
2776 string_methods, /* tp_methods */
2777 0, /* tp_members */
2778 0, /* tp_getset */
2779 0, /* tp_base */
2780 0, /* tp_dict */
2781 0, /* tp_descr_get */
2782 0, /* tp_descr_set */
2783 0, /* tp_dictoffset */
2784 0, /* tp_init */
2785 0, /* tp_alloc */
2786 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002787 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002788};
2789
2790void
Fred Drakeba096332000-07-09 07:04:36 +00002791PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002792{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002794 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002795 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002796 if (w == NULL || !PyString_Check(*pv)) {
2797 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002798 *pv = NULL;
2799 return;
2800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 v = string_concat((PyStringObject *) *pv, w);
2802 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 *pv = v;
2804}
2805
Guido van Rossum013142a1994-08-30 08:19:36 +00002806void
Fred Drakeba096332000-07-09 07:04:36 +00002807PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002808{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002809 PyString_Concat(pv, w);
2810 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002811}
2812
2813
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002814/* The following function breaks the notion that strings are immutable:
2815 it changes the size of a string. We get away with this only if there
2816 is only one module referencing the object. You can also think of it
2817 as creating a new string object and destroying the old one, only
2818 more efficiently. In any case, don't use this if the string may
2819 already be known to some other part of the code... */
2820
2821int
Fred Drakeba096332000-07-09 07:04:36 +00002822_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002823{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 register PyObject *v;
2825 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002826 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002827 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002828 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002829 Py_DECREF(v);
2830 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002831 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002832 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002833 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002834#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002835 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002836#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 _Py_ForgetReference(v);
2838 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002839 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002840 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002841 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002842 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002843 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002844 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002845 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 _Py_NewReference(*pv);
2847 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002848 sv->ob_size = newsize;
2849 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002850 return 0;
2851}
Guido van Rossume5372401993-03-16 12:15:04 +00002852
2853/* Helpers for formatstring */
2854
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002855static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002856getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002857{
2858 int argidx = *p_argidx;
2859 if (argidx < arglen) {
2860 (*p_argidx)++;
2861 if (arglen < 0)
2862 return args;
2863 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002864 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002865 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002866 PyErr_SetString(PyExc_TypeError,
2867 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002868 return NULL;
2869}
2870
Tim Peters38fd5b62000-09-21 05:43:11 +00002871/* Format codes
2872 * F_LJUST '-'
2873 * F_SIGN '+'
2874 * F_BLANK ' '
2875 * F_ALT '#'
2876 * F_ZERO '0'
2877 */
Guido van Rossume5372401993-03-16 12:15:04 +00002878#define F_LJUST (1<<0)
2879#define F_SIGN (1<<1)
2880#define F_BLANK (1<<2)
2881#define F_ALT (1<<3)
2882#define F_ZERO (1<<4)
2883
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002884static int
Fred Drakeba096332000-07-09 07:04:36 +00002885formatfloat(char *buf, size_t buflen, int flags,
2886 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002887{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002888 /* fmt = '%#.' + `prec` + `type`
2889 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002890 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002891 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002892 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002893 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002894 if (prec < 0)
2895 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002896 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2897 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002898 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2899 (flags&F_ALT) ? "#" : "",
2900 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002901 /* worst case length calc to ensure no buffer overrun:
2902 fmt = %#.<prec>g
2903 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002904 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002905 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2906 If prec=0 the effective precision is 1 (the leading digit is
2907 always given), therefore increase by one to 10+prec. */
2908 if (buflen <= (size_t)10 + (size_t)prec) {
2909 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002910 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002911 return -1;
2912 }
Tim Peters885d4572001-11-28 20:27:42 +00002913 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002914 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002915}
2916
Tim Peters38fd5b62000-09-21 05:43:11 +00002917/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2918 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2919 * Python's regular ints.
2920 * Return value: a new PyString*, or NULL if error.
2921 * . *pbuf is set to point into it,
2922 * *plen set to the # of chars following that.
2923 * Caller must decref it when done using pbuf.
2924 * The string starting at *pbuf is of the form
2925 * "-"? ("0x" | "0X")? digit+
2926 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002927 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002928 * There will be at least prec digits, zero-filled on the left if
2929 * necessary to get that many.
2930 * val object to be converted
2931 * flags bitmask of format flags; only F_ALT is looked at
2932 * prec minimum number of digits; 0-fill on left if needed
2933 * type a character in [duoxX]; u acts the same as d
2934 *
2935 * CAUTION: o, x and X conversions on regular ints can never
2936 * produce a '-' sign, but can for Python's unbounded ints.
2937 */
2938PyObject*
2939_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2940 char **pbuf, int *plen)
2941{
2942 PyObject *result = NULL;
2943 char *buf;
2944 int i;
2945 int sign; /* 1 if '-', else 0 */
2946 int len; /* number of characters */
2947 int numdigits; /* len == numnondigits + numdigits */
2948 int numnondigits = 0;
2949
2950 switch (type) {
2951 case 'd':
2952 case 'u':
2953 result = val->ob_type->tp_str(val);
2954 break;
2955 case 'o':
2956 result = val->ob_type->tp_as_number->nb_oct(val);
2957 break;
2958 case 'x':
2959 case 'X':
2960 numnondigits = 2;
2961 result = val->ob_type->tp_as_number->nb_hex(val);
2962 break;
2963 default:
2964 assert(!"'type' not in [duoxX]");
2965 }
2966 if (!result)
2967 return NULL;
2968
2969 /* To modify the string in-place, there can only be one reference. */
2970 if (result->ob_refcnt != 1) {
2971 PyErr_BadInternalCall();
2972 return NULL;
2973 }
2974 buf = PyString_AsString(result);
2975 len = PyString_Size(result);
2976 if (buf[len-1] == 'L') {
2977 --len;
2978 buf[len] = '\0';
2979 }
2980 sign = buf[0] == '-';
2981 numnondigits += sign;
2982 numdigits = len - numnondigits;
2983 assert(numdigits > 0);
2984
Tim Petersfff53252001-04-12 18:38:48 +00002985 /* Get rid of base marker unless F_ALT */
2986 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002987 /* Need to skip 0x, 0X or 0. */
2988 int skipped = 0;
2989 switch (type) {
2990 case 'o':
2991 assert(buf[sign] == '0');
2992 /* If 0 is only digit, leave it alone. */
2993 if (numdigits > 1) {
2994 skipped = 1;
2995 --numdigits;
2996 }
2997 break;
2998 case 'x':
2999 case 'X':
3000 assert(buf[sign] == '0');
3001 assert(buf[sign + 1] == 'x');
3002 skipped = 2;
3003 numnondigits -= 2;
3004 break;
3005 }
3006 if (skipped) {
3007 buf += skipped;
3008 len -= skipped;
3009 if (sign)
3010 buf[0] = '-';
3011 }
3012 assert(len == numnondigits + numdigits);
3013 assert(numdigits > 0);
3014 }
3015
3016 /* Fill with leading zeroes to meet minimum width. */
3017 if (prec > numdigits) {
3018 PyObject *r1 = PyString_FromStringAndSize(NULL,
3019 numnondigits + prec);
3020 char *b1;
3021 if (!r1) {
3022 Py_DECREF(result);
3023 return NULL;
3024 }
3025 b1 = PyString_AS_STRING(r1);
3026 for (i = 0; i < numnondigits; ++i)
3027 *b1++ = *buf++;
3028 for (i = 0; i < prec - numdigits; i++)
3029 *b1++ = '0';
3030 for (i = 0; i < numdigits; i++)
3031 *b1++ = *buf++;
3032 *b1 = '\0';
3033 Py_DECREF(result);
3034 result = r1;
3035 buf = PyString_AS_STRING(result);
3036 len = numnondigits + prec;
3037 }
3038
3039 /* Fix up case for hex conversions. */
3040 switch (type) {
3041 case 'x':
3042 /* Need to convert all upper case letters to lower case. */
3043 for (i = 0; i < len; i++)
3044 if (buf[i] >= 'A' && buf[i] <= 'F')
3045 buf[i] += 'a'-'A';
3046 break;
3047 case 'X':
3048 /* Need to convert 0x to 0X (and -0x to -0X). */
3049 if (buf[sign + 1] == 'x')
3050 buf[sign + 1] = 'X';
3051 break;
3052 }
3053 *pbuf = buf;
3054 *plen = len;
3055 return result;
3056}
3057
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003058static int
Fred Drakeba096332000-07-09 07:04:36 +00003059formatint(char *buf, size_t buflen, int flags,
3060 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003061{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003062 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003063 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3064 + 1 + 1 = 24 */
3065 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003066 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003067 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003068 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003069 if (prec < 0)
3070 prec = 1;
Tim Peters885d4572001-11-28 20:27:42 +00003071 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3072 (flags&F_ALT) ? "#" : "",
3073 prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003074 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003075 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003076 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003077 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003078 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003079 return -1;
3080 }
Tim Peters885d4572001-11-28 20:27:42 +00003081 PyOS_snprintf(buf, buflen, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003082 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3083 * but we want it (for consistency with other %#x conversions, and
3084 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003085 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3086 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3087 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003088 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003089 if (x == 0 &&
3090 (flags & F_ALT) &&
3091 (type == 'x' || type == 'X') &&
3092 buf[1] != (char)type) /* this last always true under std C */
3093 {
Tim Petersfff53252001-04-12 18:38:48 +00003094 memmove(buf+2, buf, strlen(buf) + 1);
3095 buf[0] = '0';
3096 buf[1] = (char)type;
3097 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003098 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003099}
3100
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003101static int
Fred Drakeba096332000-07-09 07:04:36 +00003102formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003103{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003104 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003105 if (PyString_Check(v)) {
3106 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003107 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003108 }
3109 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003111 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003112 }
3113 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003114 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003115}
3116
Guido van Rossum013142a1994-08-30 08:19:36 +00003117
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003118/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3119
3120 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3121 chars are formatted. XXX This is a magic number. Each formatting
3122 routine does bounds checking to ensure no overflow, but a better
3123 solution may be to malloc a buffer of appropriate size for each
3124 format. For now, the current solution is sufficient.
3125*/
3126#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003127
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003128PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003129PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003130{
3131 char *fmt, *res;
3132 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003133 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003134 PyObject *result, *orig_args;
3135#ifdef Py_USING_UNICODE
3136 PyObject *v, *w;
3137#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003138 PyObject *dict = NULL;
3139 if (format == NULL || !PyString_Check(format) || args == NULL) {
3140 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003141 return NULL;
3142 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003143 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 fmt = PyString_AsString(format);
3145 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003146 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003147 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003148 if (result == NULL)
3149 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 res = PyString_AsString(result);
3151 if (PyTuple_Check(args)) {
3152 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003153 argidx = 0;
3154 }
3155 else {
3156 arglen = -1;
3157 argidx = -2;
3158 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003159 if (args->ob_type->tp_as_mapping)
3160 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003161 while (--fmtcnt >= 0) {
3162 if (*fmt != '%') {
3163 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003164 rescnt = fmtcnt + 100;
3165 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003166 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003167 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003168 res = PyString_AsString(result)
3169 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003170 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003171 }
3172 *res++ = *fmt++;
3173 }
3174 else {
3175 /* Got a format specifier */
3176 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003177 int width = -1;
3178 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003179 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003180 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003181 PyObject *v = NULL;
3182 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003183 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003184 int sign;
3185 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003186 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003187#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003188 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003189 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003190#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003191
Guido van Rossumda9c2711996-12-05 21:58:58 +00003192 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003193 if (*fmt == '(') {
3194 char *keystart;
3195 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003196 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003197 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003198
3199 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003201 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003202 goto error;
3203 }
3204 ++fmt;
3205 --fmtcnt;
3206 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003207 /* Skip over balanced parentheses */
3208 while (pcount > 0 && --fmtcnt >= 0) {
3209 if (*fmt == ')')
3210 --pcount;
3211 else if (*fmt == '(')
3212 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003213 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003214 }
3215 keylen = fmt - keystart - 1;
3216 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003217 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003218 "incomplete format key");
3219 goto error;
3220 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003221 key = PyString_FromStringAndSize(keystart,
3222 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003223 if (key == NULL)
3224 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003225 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003227 args_owned = 0;
3228 }
3229 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003230 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003231 if (args == NULL) {
3232 goto error;
3233 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003234 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003235 arglen = -1;
3236 argidx = -2;
3237 }
Guido van Rossume5372401993-03-16 12:15:04 +00003238 while (--fmtcnt >= 0) {
3239 switch (c = *fmt++) {
3240 case '-': flags |= F_LJUST; continue;
3241 case '+': flags |= F_SIGN; continue;
3242 case ' ': flags |= F_BLANK; continue;
3243 case '#': flags |= F_ALT; continue;
3244 case '0': flags |= F_ZERO; continue;
3245 }
3246 break;
3247 }
3248 if (c == '*') {
3249 v = getnextarg(args, arglen, &argidx);
3250 if (v == NULL)
3251 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003252 if (!PyInt_Check(v)) {
3253 PyErr_SetString(PyExc_TypeError,
3254 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003255 goto error;
3256 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003258 if (width < 0) {
3259 flags |= F_LJUST;
3260 width = -width;
3261 }
Guido van Rossume5372401993-03-16 12:15:04 +00003262 if (--fmtcnt >= 0)
3263 c = *fmt++;
3264 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003265 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003266 width = c - '0';
3267 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003268 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003269 if (!isdigit(c))
3270 break;
3271 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003272 PyErr_SetString(
3273 PyExc_ValueError,
3274 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003275 goto error;
3276 }
3277 width = width*10 + (c - '0');
3278 }
3279 }
3280 if (c == '.') {
3281 prec = 0;
3282 if (--fmtcnt >= 0)
3283 c = *fmt++;
3284 if (c == '*') {
3285 v = getnextarg(args, arglen, &argidx);
3286 if (v == NULL)
3287 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003288 if (!PyInt_Check(v)) {
3289 PyErr_SetString(
3290 PyExc_TypeError,
3291 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003292 goto error;
3293 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003294 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003295 if (prec < 0)
3296 prec = 0;
3297 if (--fmtcnt >= 0)
3298 c = *fmt++;
3299 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003300 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003301 prec = c - '0';
3302 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003303 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003304 if (!isdigit(c))
3305 break;
3306 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003307 PyErr_SetString(
3308 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003309 "prec too big");
3310 goto error;
3311 }
3312 prec = prec*10 + (c - '0');
3313 }
3314 }
3315 } /* prec */
3316 if (fmtcnt >= 0) {
3317 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003318 if (--fmtcnt >= 0)
3319 c = *fmt++;
3320 }
3321 }
3322 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003323 PyErr_SetString(PyExc_ValueError,
3324 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003325 goto error;
3326 }
3327 if (c != '%') {
3328 v = getnextarg(args, arglen, &argidx);
3329 if (v == NULL)
3330 goto error;
3331 }
3332 sign = 0;
3333 fill = ' ';
3334 switch (c) {
3335 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003336 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003337 len = 1;
3338 break;
3339 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003340 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003341#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003342 if (PyUnicode_Check(v)) {
3343 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003344 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003345 goto unicode;
3346 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003347#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003348 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003349 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003350 else
3351 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003352 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003353 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003354 if (!PyString_Check(temp)) {
3355 PyErr_SetString(PyExc_TypeError,
3356 "%s argument has non-string str()");
3357 goto error;
3358 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003359 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003360 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003361 if (prec >= 0 && len > prec)
3362 len = prec;
3363 break;
3364 case 'i':
3365 case 'd':
3366 case 'u':
3367 case 'o':
3368 case 'x':
3369 case 'X':
3370 if (c == 'i')
3371 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003372 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003373 temp = _PyString_FormatLong(v, flags,
3374 prec, c, &pbuf, &len);
3375 if (!temp)
3376 goto error;
3377 /* unbounded ints can always produce
3378 a sign character! */
3379 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003380 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003381 else {
3382 pbuf = formatbuf;
3383 len = formatint(pbuf, sizeof(formatbuf),
3384 flags, prec, c, v);
3385 if (len < 0)
3386 goto error;
3387 /* only d conversion is signed */
3388 sign = c == 'd';
3389 }
3390 if (flags & F_ZERO)
3391 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003392 break;
3393 case 'e':
3394 case 'E':
3395 case 'f':
3396 case 'g':
3397 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003398 pbuf = formatbuf;
3399 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003400 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003401 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003402 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003403 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003404 fill = '0';
3405 break;
3406 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003407 pbuf = formatbuf;
3408 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003409 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003410 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003411 break;
3412 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003413 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003414 "unsupported format character '%c' (0x%x) "
3415 "at index %i",
3416 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003417 goto error;
3418 }
3419 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003420 if (*pbuf == '-' || *pbuf == '+') {
3421 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003422 len--;
3423 }
3424 else if (flags & F_SIGN)
3425 sign = '+';
3426 else if (flags & F_BLANK)
3427 sign = ' ';
3428 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003429 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003430 }
3431 if (width < len)
3432 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003433 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003434 reslen -= rescnt;
3435 rescnt = width + fmtcnt + 100;
3436 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003437 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003438 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003439 res = PyString_AsString(result)
3440 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003441 }
3442 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003443 if (fill != ' ')
3444 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003445 rescnt--;
3446 if (width > len)
3447 width--;
3448 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003449 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3450 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003451 assert(pbuf[1] == c);
3452 if (fill != ' ') {
3453 *res++ = *pbuf++;
3454 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003455 }
Tim Petersfff53252001-04-12 18:38:48 +00003456 rescnt -= 2;
3457 width -= 2;
3458 if (width < 0)
3459 width = 0;
3460 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003461 }
3462 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003463 do {
3464 --rescnt;
3465 *res++ = fill;
3466 } while (--width > len);
3467 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003468 if (fill == ' ') {
3469 if (sign)
3470 *res++ = sign;
3471 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003472 (c == 'x' || c == 'X')) {
3473 assert(pbuf[0] == '0');
3474 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003475 *res++ = *pbuf++;
3476 *res++ = *pbuf++;
3477 }
3478 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003479 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003480 res += len;
3481 rescnt -= len;
3482 while (--width >= len) {
3483 --rescnt;
3484 *res++ = ' ';
3485 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003486 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003487 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003488 "not all arguments converted");
3489 goto error;
3490 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003491 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003492 } /* '%' */
3493 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003494 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003495 PyErr_SetString(PyExc_TypeError,
3496 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003497 goto error;
3498 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003499 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003500 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003501 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003502 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003503 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003504
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003505#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003506 unicode:
3507 if (args_owned) {
3508 Py_DECREF(args);
3509 args_owned = 0;
3510 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003511 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003512 if (PyTuple_Check(orig_args) && argidx > 0) {
3513 PyObject *v;
3514 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3515 v = PyTuple_New(n);
3516 if (v == NULL)
3517 goto error;
3518 while (--n >= 0) {
3519 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3520 Py_INCREF(w);
3521 PyTuple_SET_ITEM(v, n, w);
3522 }
3523 args = v;
3524 } else {
3525 Py_INCREF(orig_args);
3526 args = orig_args;
3527 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003528 args_owned = 1;
3529 /* Take what we have of the result and let the Unicode formatting
3530 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003531 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003532 if (_PyString_Resize(&result, rescnt))
3533 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003534 fmtcnt = PyString_GET_SIZE(format) - \
3535 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003536 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3537 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003538 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003539 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003540 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003541 if (v == NULL)
3542 goto error;
3543 /* Paste what we have (result) to what the Unicode formatting
3544 function returned (v) and return the result (or error) */
3545 w = PyUnicode_Concat(result, v);
3546 Py_DECREF(result);
3547 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003548 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003549 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003550#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003551
Guido van Rossume5372401993-03-16 12:15:04 +00003552 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003553 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003554 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003555 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003556 }
Guido van Rossume5372401993-03-16 12:15:04 +00003557 return NULL;
3558}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003559
3560
3561#ifdef INTERN_STRINGS
3562
Barry Warsaw4df762f2000-08-16 23:41:01 +00003563/* This dictionary will leak at PyString_Fini() time. That's acceptable
3564 * because PyString_Fini() specifically frees interned strings that are
3565 * only referenced by this dictionary. The CVS log entry for revision 2.45
3566 * says:
3567 *
3568 * Change the Fini function to only remove otherwise unreferenced
3569 * strings from the interned table. There are references in
3570 * hard-to-find static variables all over the interpreter, and it's not
3571 * worth trying to get rid of all those; but "uninterning" isn't fair
3572 * either and may cause subtle failures later -- so we have to keep them
3573 * in the interned table.
3574 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003575static PyObject *interned;
3576
3577void
Fred Drakeba096332000-07-09 07:04:36 +00003578PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003579{
3580 register PyStringObject *s = (PyStringObject *)(*p);
3581 PyObject *t;
3582 if (s == NULL || !PyString_Check(s))
3583 Py_FatalError("PyString_InternInPlace: strings only please!");
3584 if ((t = s->ob_sinterned) != NULL) {
3585 if (t == (PyObject *)s)
3586 return;
3587 Py_INCREF(t);
3588 *p = t;
3589 Py_DECREF(s);
3590 return;
3591 }
3592 if (interned == NULL) {
3593 interned = PyDict_New();
3594 if (interned == NULL)
3595 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003596 }
3597 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3598 Py_INCREF(t);
3599 *p = s->ob_sinterned = t;
3600 Py_DECREF(s);
3601 return;
3602 }
Tim Peters111f6092001-09-12 07:54:51 +00003603 /* Ensure that only true string objects appear in the intern dict,
3604 and as the value of ob_sinterned. */
3605 if (PyString_CheckExact(s)) {
3606 t = (PyObject *)s;
3607 if (PyDict_SetItem(interned, t, t) == 0) {
3608 s->ob_sinterned = t;
3609 return;
3610 }
3611 }
3612 else {
3613 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3614 PyString_GET_SIZE(s));
3615 if (t != NULL) {
3616 if (PyDict_SetItem(interned, t, t) == 0) {
3617 *p = s->ob_sinterned = t;
3618 Py_DECREF(s);
3619 return;
3620 }
3621 Py_DECREF(t);
3622 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003623 }
3624 PyErr_Clear();
3625}
3626
3627
3628PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003629PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003630{
3631 PyObject *s = PyString_FromString(cp);
3632 if (s == NULL)
3633 return NULL;
3634 PyString_InternInPlace(&s);
3635 return s;
3636}
3637
3638#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003639
3640void
Fred Drakeba096332000-07-09 07:04:36 +00003641PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003642{
3643 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003644 for (i = 0; i < UCHAR_MAX + 1; i++) {
3645 Py_XDECREF(characters[i]);
3646 characters[i] = NULL;
3647 }
3648#ifndef DONT_SHARE_SHORT_STRINGS
3649 Py_XDECREF(nullstring);
3650 nullstring = NULL;
3651#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003652#ifdef INTERN_STRINGS
3653 if (interned) {
3654 int pos, changed;
3655 PyObject *key, *value;
3656 do {
3657 changed = 0;
3658 pos = 0;
3659 while (PyDict_Next(interned, &pos, &key, &value)) {
3660 if (key->ob_refcnt == 2 && key == value) {
3661 PyDict_DelItem(interned, key);
3662 changed = 1;
3663 }
3664 }
3665 } while (changed);
3666 }
3667#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003668}
Barry Warsawa903ad982001-02-23 16:40:48 +00003669
3670#ifdef INTERN_STRINGS
3671void _Py_ReleaseInternedStrings(void)
3672{
3673 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003674 fprintf(stderr, "releasing interned strings\n");
3675 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003676 Py_DECREF(interned);
3677 interned = NULL;
3678 }
3679}
3680#endif /* INTERN_STRINGS */