blob: 74c4b5206e37d72d03e3dad606ebeb99a8517452 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
Tim Petersc15c4f12001-10-02 21:32:07 +0000153 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
Tim Petersc15c4f12001-10-02 21:32:07 +0000159#ifdef VA_LIST_IS_ARRAY
160 memcpy(count, vargs, sizeof(va_list));
161#else
162 count = vargs;
163#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000164 /* step 1: figure out how large a buffer we need */
165 for (f = format; *f; f++) {
166 if (*f == '%') {
167 const char* p = f;
168 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
169 ;
170
171 /* skip the 'l' in %ld, since it doesn't change the
172 width. although only %d is supported (see
173 "expand" section below), others can be easily
174 add */
175 if (*f == 'l' && *(f+1) == 'd')
176 ++f;
177
178 switch (*f) {
179 case 'c':
180 (void)va_arg(count, int);
181 /* fall through... */
182 case '%':
183 n++;
184 break;
185 case 'd': case 'i': case 'x':
186 (void) va_arg(count, int);
187 /* 20 bytes should be enough to hold a 64-bit
188 integer */
189 n += 20;
190 break;
191 case 's':
192 s = va_arg(count, char*);
193 n += strlen(s);
194 break;
195 case 'p':
196 (void) va_arg(count, int);
197 /* maximum 64-bit pointer representation:
198 * 0xffffffffffffffff
199 * so 19 characters is enough.
200 */
201 n += 19;
202 break;
203 default:
204 /* if we stumble upon an unknown
205 formatting code, copy the rest of
206 the format string to the output
207 string. (we cannot just skip the
208 code, since there's no way to know
209 what's in the argument list) */
210 n += strlen(p);
211 goto expand;
212 }
213 } else
214 n++;
215 }
216 expand:
217 /* step 2: fill the buffer */
218 string = PyString_FromStringAndSize(NULL, n);
219 if (!string)
220 return NULL;
221
222 s = PyString_AsString(string);
223
224 for (f = format; *f; f++) {
225 if (*f == '%') {
226 const char* p = f++;
227 int i, longflag = 0;
228 /* parse the width.precision part (we're only
229 interested in the precision value, if any) */
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 if (*f == '.') {
234 f++;
235 n = 0;
236 while (isdigit(Py_CHARMASK(*f)))
237 n = (n*10) + *f++ - '0';
238 }
239 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
240 f++;
241 /* handle the long flag, but only for %ld. others
242 can be added when necessary. */
243 if (*f == 'l' && *(f+1) == 'd') {
244 longflag = 1;
245 ++f;
246 }
247
248 switch (*f) {
249 case 'c':
250 *s++ = va_arg(vargs, int);
251 break;
252 case 'd':
253 if (longflag)
254 sprintf(s, "%ld", va_arg(vargs, long));
255 else
256 sprintf(s, "%d", va_arg(vargs, int));
257 s += strlen(s);
258 break;
259 case 'i':
260 sprintf(s, "%i", va_arg(vargs, int));
261 s += strlen(s);
262 break;
263 case 'x':
264 sprintf(s, "%x", va_arg(vargs, int));
265 s += strlen(s);
266 break;
267 case 's':
268 p = va_arg(vargs, char*);
269 i = strlen(p);
270 if (n > 0 && i > n)
271 i = n;
272 memcpy(s, p, i);
273 s += i;
274 break;
275 case 'p':
276 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000277 /* %p is ill-defined: ensure leading 0x. */
278 if (s[1] == 'X')
279 s[1] = 'x';
280 else if (s[1] != 'x') {
281 memmove(s+2, s, strlen(s)+1);
282 s[0] = '0';
283 s[1] = 'x';
284 }
Barry Warsawdadace02001-08-24 18:32:06 +0000285 s += strlen(s);
286 break;
287 case '%':
288 *s++ = '%';
289 break;
290 default:
291 strcpy(s, p);
292 s += strlen(s);
293 goto end;
294 }
295 } else
296 *s++ = *f;
297 }
298
299 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000300 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000301 return string;
302}
303
304PyObject *
305PyString_FromFormat(const char *format, ...)
306{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000307 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000308 va_list vargs;
309
310#ifdef HAVE_STDARG_PROTOTYPES
311 va_start(vargs, format);
312#else
313 va_start(vargs);
314#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000315 ret = PyString_FromFormatV(format, vargs);
316 va_end(vargs);
317 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318}
319
320
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000321PyObject *PyString_Decode(const char *s,
322 int size,
323 const char *encoding,
324 const char *errors)
325{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000326 PyObject *v, *str;
327
328 str = PyString_FromStringAndSize(s, size);
329 if (str == NULL)
330 return NULL;
331 v = PyString_AsDecodedString(str, encoding, errors);
332 Py_DECREF(str);
333 return v;
334}
335
336PyObject *PyString_AsDecodedObject(PyObject *str,
337 const char *encoding,
338 const char *errors)
339{
340 PyObject *v;
341
342 if (!PyString_Check(str)) {
343 PyErr_BadArgument();
344 goto onError;
345 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000346
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000347 if (encoding == NULL) {
348#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000349 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000350#else
351 PyErr_SetString(PyExc_ValueError, "no encoding specified");
352 goto onError;
353#endif
354 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355
356 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000357 v = PyCodec_Decode(str, encoding, errors);
358 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360
361 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000364 return NULL;
365}
366
367PyObject *PyString_AsDecodedString(PyObject *str,
368 const char *encoding,
369 const char *errors)
370{
371 PyObject *v;
372
373 v = PyString_AsDecodedObject(str, encoding, errors);
374 if (v == NULL)
375 goto onError;
376
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000377#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000378 /* Convert Unicode to a string using the default encoding */
379 if (PyUnicode_Check(v)) {
380 PyObject *temp = v;
381 v = PyUnicode_AsEncodedString(v, NULL, NULL);
382 Py_DECREF(temp);
383 if (v == NULL)
384 goto onError;
385 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000387 if (!PyString_Check(v)) {
388 PyErr_Format(PyExc_TypeError,
389 "decoder did not return a string object (type=%.400s)",
390 v->ob_type->tp_name);
391 Py_DECREF(v);
392 goto onError;
393 }
394
395 return v;
396
397 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 return NULL;
399}
400
401PyObject *PyString_Encode(const char *s,
402 int size,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000407
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 str = PyString_FromStringAndSize(s, size);
409 if (str == NULL)
410 return NULL;
411 v = PyString_AsEncodedString(str, encoding, errors);
412 Py_DECREF(str);
413 return v;
414}
415
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000416PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 const char *encoding,
418 const char *errors)
419{
420 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000421
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 if (!PyString_Check(str)) {
423 PyErr_BadArgument();
424 goto onError;
425 }
426
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000427 if (encoding == NULL) {
428#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000430#else
431 PyErr_SetString(PyExc_ValueError, "no encoding specified");
432 goto onError;
433#endif
434 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435
436 /* Encode via the codec registry */
437 v = PyCodec_Encode(str, encoding, errors);
438 if (v == NULL)
439 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000440
441 return v;
442
443 onError:
444 return NULL;
445}
446
447PyObject *PyString_AsEncodedString(PyObject *str,
448 const char *encoding,
449 const char *errors)
450{
451 PyObject *v;
452
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000453 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000454 if (v == NULL)
455 goto onError;
456
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000457#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000458 /* Convert Unicode to a string using the default encoding */
459 if (PyUnicode_Check(v)) {
460 PyObject *temp = v;
461 v = PyUnicode_AsEncodedString(v, NULL, NULL);
462 Py_DECREF(temp);
463 if (v == NULL)
464 goto onError;
465 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000467 if (!PyString_Check(v)) {
468 PyErr_Format(PyExc_TypeError,
469 "encoder did not return a string object (type=%.400s)",
470 v->ob_type->tp_name);
471 Py_DECREF(v);
472 goto onError;
473 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000476
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 onError:
478 return NULL;
479}
480
Guido van Rossum234f9421993-06-17 12:35:49 +0000481static void
Fred Drakeba096332000-07-09 07:04:36 +0000482string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000483{
Guido van Rossum9475a232001-10-05 20:51:39 +0000484 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000485}
486
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000487static int
488string_getsize(register PyObject *op)
489{
490 char *s;
491 int len;
492 if (PyString_AsStringAndSize(op, &s, &len))
493 return -1;
494 return len;
495}
496
497static /*const*/ char *
498string_getbuffer(register PyObject *op)
499{
500 char *s;
501 int len;
502 if (PyString_AsStringAndSize(op, &s, &len))
503 return NULL;
504 return s;
505}
506
Guido van Rossumd7047b31995-01-02 19:07:15 +0000507int
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
515/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518 if (!PyString_Check(op))
519 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000520 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521}
522
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523int
524PyString_AsStringAndSize(register PyObject *obj,
525 register char **s,
526 register int *len)
527{
528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
532
533 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000534#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000535 if (PyUnicode_Check(obj)) {
536 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
537 if (obj == NULL)
538 return -1;
539 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000540 else
541#endif
542 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543 PyErr_Format(PyExc_TypeError,
544 "expected string or Unicode object, "
545 "%.200s found", obj->ob_type->tp_name);
546 return -1;
547 }
548 }
549
550 *s = PyString_AS_STRING(obj);
551 if (len != NULL)
552 *len = PyString_GET_SIZE(obj);
553 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
554 PyErr_SetString(PyExc_TypeError,
555 "expected string without null bytes");
556 return -1;
557 }
558 return 0;
559}
560
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561/* Methods */
562
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000563static int
Fred Drakeba096332000-07-09 07:04:36 +0000564string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565{
566 int i;
567 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000568 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000569
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000570 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000571 if (! PyString_CheckExact(op)) {
572 int ret;
573 /* A str subclass may have its own __str__ method. */
574 op = (PyStringObject *) PyObject_Str((PyObject *)op);
575 if (op == NULL)
576 return -1;
577 ret = string_print(op, fp, flags);
578 Py_DECREF(op);
579 return ret;
580 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000583 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000584 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000585
Thomas Wouters7e474022000-07-16 12:04:32 +0000586 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000587 quote = '\'';
588 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
589 quote = '"';
590
591 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000592 for (i = 0; i < op->ob_size; i++) {
593 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000594 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000596 else if (c == '\t')
597 fprintf(fp, "\\t");
598 else if (c == '\n')
599 fprintf(fp, "\\n");
600 else if (c == '\r')
601 fprintf(fp, "\\r");
602 else if (c < ' ' || c >= 0x7f)
603 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000605 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000608 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000609}
610
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000612string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000614 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
615 PyObject *v;
616 if (newsize > INT_MAX) {
617 PyErr_SetString(PyExc_OverflowError,
618 "string is too large to make repr");
619 }
620 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000622 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 }
624 else {
625 register int i;
626 register char c;
627 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000628 int quote;
629
Thomas Wouters7e474022000-07-16 12:04:32 +0000630 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000631 quote = '\'';
632 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
633 quote = '"';
634
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000635 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000637 for (i = 0; i < op->ob_size; i++) {
638 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000641 else if (c == '\t')
642 *p++ = '\\', *p++ = 't';
643 else if (c == '\n')
644 *p++ = '\\', *p++ = 'n';
645 else if (c == '\r')
646 *p++ = '\\', *p++ = 'r';
647 else if (c < ' ' || c >= 0x7f) {
648 sprintf(p, "\\x%02x", c & 0xff);
649 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 }
651 else
652 *p++ = c;
653 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000654 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000656 _PyString_Resize(
657 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000658 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000660}
661
Guido van Rossum189f1df2001-05-01 16:51:53 +0000662static PyObject *
663string_str(PyObject *s)
664{
Tim Petersc9933152001-10-16 20:18:24 +0000665 assert(PyString_Check(s));
666 if (PyString_CheckExact(s)) {
667 Py_INCREF(s);
668 return s;
669 }
670 else {
671 /* Subtype -- return genuine string with the same value. */
672 PyStringObject *t = (PyStringObject *) s;
673 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
674 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000675}
676
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677static int
Fred Drakeba096332000-07-09 07:04:36 +0000678string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000679{
680 return a->ob_size;
681}
682
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000683static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000684string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000685{
686 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687 register PyStringObject *op;
688 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000689#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000690 if (PyUnicode_Check(bb))
691 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000692#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000693 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000694 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000695 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696 return NULL;
697 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000700 if ((a->ob_size == 0 || b->ob_size == 0) &&
701 PyString_CheckExact(a) && PyString_CheckExact(b)) {
702 if (a->ob_size == 0) {
703 Py_INCREF(bb);
704 return bb;
705 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 Py_INCREF(a);
707 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000708 }
709 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000710 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000711 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000712 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000713 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000715 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000716#ifdef CACHE_HASH
717 op->ob_shash = -1;
718#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000719#ifdef INTERN_STRINGS
720 op->ob_sinterned = NULL;
721#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000722 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
723 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
724 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726#undef b
727}
728
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000730string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731{
732 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000733 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000734 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000735 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736 if (n < 0)
737 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000738 /* watch out for overflows: the size can overflow int,
739 * and the # of bytes needed can overflow size_t
740 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000742 if (n && size / n != a->ob_size) {
743 PyErr_SetString(PyExc_OverflowError,
744 "repeated string is too long");
745 return NULL;
746 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000747 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748 Py_INCREF(a);
749 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750 }
Tim Peters8f422462000-09-09 06:13:41 +0000751 nbytes = size * sizeof(char);
752 if (nbytes / sizeof(char) != (size_t)size ||
753 nbytes + sizeof(PyStringObject) <= nbytes) {
754 PyErr_SetString(PyExc_OverflowError,
755 "repeated string is too long");
756 return NULL;
757 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000758 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000759 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000760 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000762 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000763#ifdef CACHE_HASH
764 op->ob_shash = -1;
765#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000766#ifdef INTERN_STRINGS
767 op->ob_sinterned = NULL;
768#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000769 for (i = 0; i < size; i += a->ob_size)
770 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
771 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000772 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773}
774
775/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
776
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000778string_slice(register PyStringObject *a, register int i, register int j)
779 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780{
781 if (i < 0)
782 i = 0;
783 if (j < 0)
784 j = 0; /* Avoid signed/unsigned bug in next line */
785 if (j > a->ob_size)
786 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000787 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
788 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000789 Py_INCREF(a);
790 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
792 if (j < i)
793 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000794 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Guido van Rossum9284a572000-03-07 15:53:43 +0000797static int
Fred Drakeba096332000-07-09 07:04:36 +0000798string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000799{
800 register char *s, *end;
801 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000802#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000803 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000804 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000805#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000806 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000807 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000808 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 return -1;
810 }
811 c = PyString_AsString(el)[0];
812 s = PyString_AsString(a);
813 end = s + PyString_Size(a);
814 while (s < end) {
815 if (c == *s++)
816 return 1;
817 }
818 return 0;
819}
820
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000821static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000822string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000824 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000825 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000827 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 return NULL;
829 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000830 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000831 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 if (v == NULL)
833 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000834 else {
835#ifdef COUNT_ALLOCS
836 one_strings++;
837#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000838 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000839 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000840 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841}
842
Martin v. Löwiscd353062001-05-24 16:56:35 +0000843static PyObject*
844string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000846 int c;
847 int len_a, len_b;
848 int min_len;
849 PyObject *result;
850
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000851 /* Make sure both arguments are strings. */
852 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 result = Py_NotImplemented;
854 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000855 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000856 if (a == b) {
857 switch (op) {
858 case Py_EQ:case Py_LE:case Py_GE:
859 result = Py_True;
860 goto out;
861 case Py_NE:case Py_LT:case Py_GT:
862 result = Py_False;
863 goto out;
864 }
865 }
866 if (op == Py_EQ) {
867 /* Supporting Py_NE here as well does not save
868 much time, since Py_NE is rarely used. */
869 if (a->ob_size == b->ob_size
870 && (a->ob_sval[0] == b->ob_sval[0]
871 && memcmp(a->ob_sval, b->ob_sval,
872 a->ob_size) == 0)) {
873 result = Py_True;
874 } else {
875 result = Py_False;
876 }
877 goto out;
878 }
879 len_a = a->ob_size; len_b = b->ob_size;
880 min_len = (len_a < len_b) ? len_a : len_b;
881 if (min_len > 0) {
882 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
883 if (c==0)
884 c = memcmp(a->ob_sval, b->ob_sval, min_len);
885 }else
886 c = 0;
887 if (c == 0)
888 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
889 switch (op) {
890 case Py_LT: c = c < 0; break;
891 case Py_LE: c = c <= 0; break;
892 case Py_EQ: assert(0); break; /* unreachable */
893 case Py_NE: c = c != 0; break;
894 case Py_GT: c = c > 0; break;
895 case Py_GE: c = c >= 0; break;
896 default:
897 result = Py_NotImplemented;
898 goto out;
899 }
900 result = c ? Py_True : Py_False;
901 out:
902 Py_INCREF(result);
903 return result;
904}
905
906int
907_PyString_Eq(PyObject *o1, PyObject *o2)
908{
909 PyStringObject *a, *b;
910 a = (PyStringObject*)o1;
911 b = (PyStringObject*)o2;
912 return a->ob_size == b->ob_size
913 && *a->ob_sval == *b->ob_sval
914 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915}
916
Guido van Rossum9bfef441993-03-29 10:43:31 +0000917static long
Fred Drakeba096332000-07-09 07:04:36 +0000918string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000920 register int len;
921 register unsigned char *p;
922 register long x;
923
924#ifdef CACHE_HASH
925 if (a->ob_shash != -1)
926 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000927#ifdef INTERN_STRINGS
928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000931#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000932#endif
933 len = a->ob_size;
934 p = (unsigned char *) a->ob_sval;
935 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000937 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000938 x ^= a->ob_size;
939 if (x == -1)
940 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000941#ifdef CACHE_HASH
942 a->ob_shash = x;
943#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000944 return x;
945}
946
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000947static int
Fred Drakeba096332000-07-09 07:04:36 +0000948string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000949{
950 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000951 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000952 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000953 return -1;
954 }
955 *ptr = (void *)self->ob_sval;
956 return self->ob_size;
957}
958
959static int
Fred Drakeba096332000-07-09 07:04:36 +0000960string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000961{
Guido van Rossum045e6881997-09-08 18:30:11 +0000962 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000963 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000964 return -1;
965}
966
967static int
Fred Drakeba096332000-07-09 07:04:36 +0000968string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000969{
970 if ( lenp )
971 *lenp = self->ob_size;
972 return 1;
973}
974
Guido van Rossum1db70701998-10-08 02:18:52 +0000975static int
Fred Drakeba096332000-07-09 07:04:36 +0000976string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000977{
978 if ( index != 0 ) {
979 PyErr_SetString(PyExc_SystemError,
980 "accessing non-existent string segment");
981 return -1;
982 }
983 *ptr = self->ob_sval;
984 return self->ob_size;
985}
986
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000988 (inquiry)string_length, /*sq_length*/
989 (binaryfunc)string_concat, /*sq_concat*/
990 (intargfunc)string_repeat, /*sq_repeat*/
991 (intargfunc)string_item, /*sq_item*/
992 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000993 0, /*sq_ass_item*/
994 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000995 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996};
997
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000998static PyBufferProcs string_as_buffer = {
999 (getreadbufferproc)string_buffer_getreadbuf,
1000 (getwritebufferproc)string_buffer_getwritebuf,
1001 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001002 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001003};
1004
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001005
1006
1007#define LEFTSTRIP 0
1008#define RIGHTSTRIP 1
1009#define BOTHSTRIP 2
1010
1011
1012static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001013split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001014{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001015 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 PyObject* item;
1017 PyObject *list = PyList_New(0);
1018
1019 if (list == NULL)
1020 return NULL;
1021
Guido van Rossum4c08d552000-03-10 22:55:18 +00001022 for (i = j = 0; i < len; ) {
1023 while (i < len && isspace(Py_CHARMASK(s[i])))
1024 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001026 while (i < len && !isspace(Py_CHARMASK(s[i])))
1027 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001029 if (maxsplit-- <= 0)
1030 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001031 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1032 if (item == NULL)
1033 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034 err = PyList_Append(list, item);
1035 Py_DECREF(item);
1036 if (err < 0)
1037 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001038 while (i < len && isspace(Py_CHARMASK(s[i])))
1039 i++;
1040 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041 }
1042 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001043 if (j < len) {
1044 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1045 if (item == NULL)
1046 goto finally;
1047 err = PyList_Append(list, item);
1048 Py_DECREF(item);
1049 if (err < 0)
1050 goto finally;
1051 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return list;
1053 finally:
1054 Py_DECREF(list);
1055 return NULL;
1056}
1057
1058
1059static char split__doc__[] =
1060"S.split([sep [,maxsplit]]) -> list of strings\n\
1061\n\
1062Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001063delimiter string. If maxsplit is given, at most maxsplit\n\
1064splits are done. If sep is not specified, any whitespace string\n\
1065is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066
1067static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001068string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001069{
1070 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001071 int maxsplit = -1;
1072 const char *s = PyString_AS_STRING(self), *sub;
1073 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001074
Guido van Rossum4c08d552000-03-10 22:55:18 +00001075 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001077 if (maxsplit < 0)
1078 maxsplit = INT_MAX;
1079 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001081 if (PyString_Check(subobj)) {
1082 sub = PyString_AS_STRING(subobj);
1083 n = PyString_GET_SIZE(subobj);
1084 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001085#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001086 else if (PyUnicode_Check(subobj))
1087 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001088#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1090 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091 if (n == 0) {
1092 PyErr_SetString(PyExc_ValueError, "empty separator");
1093 return NULL;
1094 }
1095
1096 list = PyList_New(0);
1097 if (list == NULL)
1098 return NULL;
1099
1100 i = j = 0;
1101 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001102 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001103 if (maxsplit-- <= 0)
1104 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001105 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1106 if (item == NULL)
1107 goto fail;
1108 err = PyList_Append(list, item);
1109 Py_DECREF(item);
1110 if (err < 0)
1111 goto fail;
1112 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 }
1114 else
1115 i++;
1116 }
1117 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1118 if (item == NULL)
1119 goto fail;
1120 err = PyList_Append(list, item);
1121 Py_DECREF(item);
1122 if (err < 0)
1123 goto fail;
1124
1125 return list;
1126
1127 fail:
1128 Py_DECREF(list);
1129 return NULL;
1130}
1131
1132
1133static char join__doc__[] =
1134"S.join(sequence) -> string\n\
1135\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001136Return a string which is the concatenation of the strings in the\n\
1137sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138
1139static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001140string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141{
1142 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001143 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001145 char *p;
1146 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001147 size_t sz = 0;
1148 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001149 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150
Tim Peters19fe14e2001-01-19 03:03:47 +00001151 seq = PySequence_Fast(orig, "");
1152 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001153 if (PyErr_ExceptionMatches(PyExc_TypeError))
1154 PyErr_Format(PyExc_TypeError,
1155 "sequence expected, %.80s found",
1156 orig->ob_type->tp_name);
1157 return NULL;
1158 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001159
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001160 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001161 if (seqlen == 0) {
1162 Py_DECREF(seq);
1163 return PyString_FromString("");
1164 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001166 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001167 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1168 PyErr_Format(PyExc_TypeError,
1169 "sequence item 0: expected string,"
1170 " %.80s found",
1171 item->ob_type->tp_name);
1172 Py_DECREF(seq);
1173 return NULL;
1174 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001175 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001176 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001177 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001179
Tim Peters19fe14e2001-01-19 03:03:47 +00001180 /* There are at least two things to join. Do a pre-pass to figure out
1181 * the total amount of space we'll need (sz), see whether any argument
1182 * is absurd, and defer to the Unicode join if appropriate.
1183 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001184 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001185 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001186 item = PySequence_Fast_GET_ITEM(seq, i);
1187 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001188#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001189 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001190 /* Defer to Unicode join.
1191 * CAUTION: There's no gurantee that the
1192 * original sequence can be iterated over
1193 * again, so we must pass seq here.
1194 */
1195 PyObject *result;
1196 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001197 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001198 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001199 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001200#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001201 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001202 "sequence item %i: expected string,"
1203 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001204 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001205 Py_DECREF(seq);
1206 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001207 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001208 sz += PyString_GET_SIZE(item);
1209 if (i != 0)
1210 sz += seplen;
1211 if (sz < old_sz || sz > INT_MAX) {
1212 PyErr_SetString(PyExc_OverflowError,
1213 "join() is too long for a Python string");
1214 Py_DECREF(seq);
1215 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001217 }
1218
1219 /* Allocate result space. */
1220 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1221 if (res == NULL) {
1222 Py_DECREF(seq);
1223 return NULL;
1224 }
1225
1226 /* Catenate everything. */
1227 p = PyString_AS_STRING(res);
1228 for (i = 0; i < seqlen; ++i) {
1229 size_t n;
1230 item = PySequence_Fast_GET_ITEM(seq, i);
1231 n = PyString_GET_SIZE(item);
1232 memcpy(p, PyString_AS_STRING(item), n);
1233 p += n;
1234 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001235 memcpy(p, sep, seplen);
1236 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001237 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001238 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001239
Jeremy Hylton49048292000-07-11 03:28:17 +00001240 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001241 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242}
1243
Tim Peters52e155e2001-06-16 05:42:57 +00001244PyObject *
1245_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001246{
Tim Petersa7259592001-06-16 05:11:17 +00001247 assert(sep != NULL && PyString_Check(sep));
1248 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001249 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001250}
1251
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001252static long
Fred Drakeba096332000-07-09 07:04:36 +00001253string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256 int len = PyString_GET_SIZE(self);
1257 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001258 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001259
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001260 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001261 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001262 return -2;
1263 if (PyString_Check(subobj)) {
1264 sub = PyString_AS_STRING(subobj);
1265 n = PyString_GET_SIZE(subobj);
1266 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001267#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 else if (PyUnicode_Check(subobj))
1269 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001270#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272 return -2;
1273
1274 if (last > len)
1275 last = len;
1276 if (last < 0)
1277 last += len;
1278 if (last < 0)
1279 last = 0;
1280 if (i < 0)
1281 i += len;
1282 if (i < 0)
1283 i = 0;
1284
Guido van Rossum4c08d552000-03-10 22:55:18 +00001285 if (dir > 0) {
1286 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001287 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 last -= n;
1289 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001290 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001291 return (long)i;
1292 }
1293 else {
1294 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001295
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 if (n == 0 && i <= last)
1297 return (long)last;
1298 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001299 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 return (long)j;
1301 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001302
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 return -1;
1304}
1305
1306
1307static char find__doc__[] =
1308"S.find(sub [,start [,end]]) -> int\n\
1309\n\
1310Return the lowest index in S where substring sub is found,\n\
1311such that sub is contained within s[start,end]. Optional\n\
1312arguments start and end are interpreted as in slice notation.\n\
1313\n\
1314Return -1 on failure.";
1315
1316static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001317string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320 if (result == -2)
1321 return NULL;
1322 return PyInt_FromLong(result);
1323}
1324
1325
1326static char index__doc__[] =
1327"S.index(sub [,start [,end]]) -> int\n\
1328\n\
1329Like S.find() but raise ValueError when the substring is not found.";
1330
1331static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001332string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001334 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 if (result == -2)
1336 return NULL;
1337 if (result == -1) {
1338 PyErr_SetString(PyExc_ValueError,
1339 "substring not found in string.index");
1340 return NULL;
1341 }
1342 return PyInt_FromLong(result);
1343}
1344
1345
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346static char rfind__doc__[] =
1347"S.rfind(sub [,start [,end]]) -> int\n\
1348\n\
1349Return the highest index in S where substring sub is found,\n\
1350such that sub is contained within s[start,end]. Optional\n\
1351arguments start and end are interpreted as in slice notation.\n\
1352\n\
1353Return -1 on failure.";
1354
1355static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001356string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359 if (result == -2)
1360 return NULL;
1361 return PyInt_FromLong(result);
1362}
1363
1364
1365static char rindex__doc__[] =
1366"S.rindex(sub [,start [,end]]) -> int\n\
1367\n\
1368Like S.rfind() but raise ValueError when the substring is not found.";
1369
1370static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001371string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001373 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 if (result == -2)
1375 return NULL;
1376 if (result == -1) {
1377 PyErr_SetString(PyExc_ValueError,
1378 "substring not found in string.rindex");
1379 return NULL;
1380 }
1381 return PyInt_FromLong(result);
1382}
1383
1384
1385static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001386do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
1388 char *s = PyString_AS_STRING(self);
1389 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 i = 0;
1392 if (striptype != RIGHTSTRIP) {
1393 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1394 i++;
1395 }
1396 }
1397
1398 j = len;
1399 if (striptype != LEFTSTRIP) {
1400 do {
1401 j--;
1402 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1403 j++;
1404 }
1405
Tim Peters8fa5dd02001-09-12 02:18:30 +00001406 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 Py_INCREF(self);
1408 return (PyObject*)self;
1409 }
1410 else
1411 return PyString_FromStringAndSize(s+i, j-i);
1412}
1413
1414
1415static char strip__doc__[] =
1416"S.strip() -> string\n\
1417\n\
1418Return a copy of the string S with leading and trailing\n\
1419whitespace removed.";
1420
1421static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001422string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001424 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425}
1426
1427
1428static char lstrip__doc__[] =
1429"S.lstrip() -> string\n\
1430\n\
1431Return a copy of the string S with leading whitespace removed.";
1432
1433static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001434string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001436 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437}
1438
1439
1440static char rstrip__doc__[] =
1441"S.rstrip() -> string\n\
1442\n\
1443Return a copy of the string S with trailing whitespace removed.";
1444
1445static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001446string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001448 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449}
1450
1451
1452static char lower__doc__[] =
1453"S.lower() -> string\n\
1454\n\
1455Return a copy of the string S converted to lowercase.";
1456
1457static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001458string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
1460 char *s = PyString_AS_STRING(self), *s_new;
1461 int i, n = PyString_GET_SIZE(self);
1462 PyObject *new;
1463
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464 new = PyString_FromStringAndSize(NULL, n);
1465 if (new == NULL)
1466 return NULL;
1467 s_new = PyString_AsString(new);
1468 for (i = 0; i < n; i++) {
1469 int c = Py_CHARMASK(*s++);
1470 if (isupper(c)) {
1471 *s_new = tolower(c);
1472 } else
1473 *s_new = c;
1474 s_new++;
1475 }
1476 return new;
1477}
1478
1479
1480static char upper__doc__[] =
1481"S.upper() -> string\n\
1482\n\
1483Return a copy of the string S converted to uppercase.";
1484
1485static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001486string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487{
1488 char *s = PyString_AS_STRING(self), *s_new;
1489 int i, n = PyString_GET_SIZE(self);
1490 PyObject *new;
1491
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 new = PyString_FromStringAndSize(NULL, n);
1493 if (new == NULL)
1494 return NULL;
1495 s_new = PyString_AsString(new);
1496 for (i = 0; i < n; i++) {
1497 int c = Py_CHARMASK(*s++);
1498 if (islower(c)) {
1499 *s_new = toupper(c);
1500 } else
1501 *s_new = c;
1502 s_new++;
1503 }
1504 return new;
1505}
1506
1507
Guido van Rossum4c08d552000-03-10 22:55:18 +00001508static char title__doc__[] =
1509"S.title() -> string\n\
1510\n\
1511Return a titlecased version of S, i.e. words start with uppercase\n\
1512characters, all remaining cased characters have lowercase.";
1513
1514static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001515string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516{
1517 char *s = PyString_AS_STRING(self), *s_new;
1518 int i, n = PyString_GET_SIZE(self);
1519 int previous_is_cased = 0;
1520 PyObject *new;
1521
Guido van Rossum4c08d552000-03-10 22:55:18 +00001522 new = PyString_FromStringAndSize(NULL, n);
1523 if (new == NULL)
1524 return NULL;
1525 s_new = PyString_AsString(new);
1526 for (i = 0; i < n; i++) {
1527 int c = Py_CHARMASK(*s++);
1528 if (islower(c)) {
1529 if (!previous_is_cased)
1530 c = toupper(c);
1531 previous_is_cased = 1;
1532 } else if (isupper(c)) {
1533 if (previous_is_cased)
1534 c = tolower(c);
1535 previous_is_cased = 1;
1536 } else
1537 previous_is_cased = 0;
1538 *s_new++ = c;
1539 }
1540 return new;
1541}
1542
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543static char capitalize__doc__[] =
1544"S.capitalize() -> string\n\
1545\n\
1546Return a copy of the string S with only its first character\n\
1547capitalized.";
1548
1549static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001550string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551{
1552 char *s = PyString_AS_STRING(self), *s_new;
1553 int i, n = PyString_GET_SIZE(self);
1554 PyObject *new;
1555
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 new = PyString_FromStringAndSize(NULL, n);
1557 if (new == NULL)
1558 return NULL;
1559 s_new = PyString_AsString(new);
1560 if (0 < n) {
1561 int c = Py_CHARMASK(*s++);
1562 if (islower(c))
1563 *s_new = toupper(c);
1564 else
1565 *s_new = c;
1566 s_new++;
1567 }
1568 for (i = 1; i < n; i++) {
1569 int c = Py_CHARMASK(*s++);
1570 if (isupper(c))
1571 *s_new = tolower(c);
1572 else
1573 *s_new = c;
1574 s_new++;
1575 }
1576 return new;
1577}
1578
1579
1580static char count__doc__[] =
1581"S.count(sub[, start[, end]]) -> int\n\
1582\n\
1583Return the number of occurrences of substring sub in string\n\
1584S[start:end]. Optional arguments start and end are\n\
1585interpreted as in slice notation.";
1586
1587static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001588string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591 int len = PyString_GET_SIZE(self), n;
1592 int i = 0, last = INT_MAX;
1593 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001594 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595
Guido van Rossumc6821402000-05-08 14:08:05 +00001596 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1597 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001599
Guido van Rossum4c08d552000-03-10 22:55:18 +00001600 if (PyString_Check(subobj)) {
1601 sub = PyString_AS_STRING(subobj);
1602 n = PyString_GET_SIZE(subobj);
1603 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001604#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001605 else if (PyUnicode_Check(subobj)) {
1606 int count;
1607 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1608 if (count == -1)
1609 return NULL;
1610 else
1611 return PyInt_FromLong((long) count);
1612 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001613#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1615 return NULL;
1616
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 if (last > len)
1618 last = len;
1619 if (last < 0)
1620 last += len;
1621 if (last < 0)
1622 last = 0;
1623 if (i < 0)
1624 i += len;
1625 if (i < 0)
1626 i = 0;
1627 m = last + 1 - n;
1628 if (n == 0)
1629 return PyInt_FromLong((long) (m-i));
1630
1631 r = 0;
1632 while (i < m) {
1633 if (!memcmp(s+i, sub, n)) {
1634 r++;
1635 i += n;
1636 } else {
1637 i++;
1638 }
1639 }
1640 return PyInt_FromLong((long) r);
1641}
1642
1643
1644static char swapcase__doc__[] =
1645"S.swapcase() -> string\n\
1646\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001647Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648converted to lowercase and vice versa.";
1649
1650static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001651string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001652{
1653 char *s = PyString_AS_STRING(self), *s_new;
1654 int i, n = PyString_GET_SIZE(self);
1655 PyObject *new;
1656
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657 new = PyString_FromStringAndSize(NULL, n);
1658 if (new == NULL)
1659 return NULL;
1660 s_new = PyString_AsString(new);
1661 for (i = 0; i < n; i++) {
1662 int c = Py_CHARMASK(*s++);
1663 if (islower(c)) {
1664 *s_new = toupper(c);
1665 }
1666 else if (isupper(c)) {
1667 *s_new = tolower(c);
1668 }
1669 else
1670 *s_new = c;
1671 s_new++;
1672 }
1673 return new;
1674}
1675
1676
1677static char translate__doc__[] =
1678"S.translate(table [,deletechars]) -> string\n\
1679\n\
1680Return a copy of the string S, where all characters occurring\n\
1681in the optional argument deletechars are removed, and the\n\
1682remaining characters have been mapped through the given\n\
1683translation table, which must be a string of length 256.";
1684
1685static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001686string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 register char *input, *output;
1689 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 register int i, c, changed = 0;
1691 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693 int inlen, tablen, dellen = 0;
1694 PyObject *result;
1695 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001697
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 if (!PyArg_ParseTuple(args, "O|O:translate",
1699 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701
1702 if (PyString_Check(tableobj)) {
1703 table1 = PyString_AS_STRING(tableobj);
1704 tablen = PyString_GET_SIZE(tableobj);
1705 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001706#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001707 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001708 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709 parameter; instead a mapping to None will cause characters
1710 to be deleted. */
1711 if (delobj != NULL) {
1712 PyErr_SetString(PyExc_TypeError,
1713 "deletions are implemented differently for unicode");
1714 return NULL;
1715 }
1716 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1717 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001718#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721
1722 if (delobj != NULL) {
1723 if (PyString_Check(delobj)) {
1724 del_table = PyString_AS_STRING(delobj);
1725 dellen = PyString_GET_SIZE(delobj);
1726 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001727#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 else if (PyUnicode_Check(delobj)) {
1729 PyErr_SetString(PyExc_TypeError,
1730 "deletions are implemented differently for unicode");
1731 return NULL;
1732 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001733#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1735 return NULL;
1736
1737 if (tablen != 256) {
1738 PyErr_SetString(PyExc_ValueError,
1739 "translation table must be 256 characters long");
1740 return NULL;
1741 }
1742 }
1743 else {
1744 del_table = NULL;
1745 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 }
1747
1748 table = table1;
1749 inlen = PyString_Size(input_obj);
1750 result = PyString_FromStringAndSize((char *)NULL, inlen);
1751 if (result == NULL)
1752 return NULL;
1753 output_start = output = PyString_AsString(result);
1754 input = PyString_AsString(input_obj);
1755
1756 if (dellen == 0) {
1757 /* If no deletions are required, use faster code */
1758 for (i = inlen; --i >= 0; ) {
1759 c = Py_CHARMASK(*input++);
1760 if (Py_CHARMASK((*output++ = table[c])) != c)
1761 changed = 1;
1762 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001763 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764 return result;
1765 Py_DECREF(result);
1766 Py_INCREF(input_obj);
1767 return input_obj;
1768 }
1769
1770 for (i = 0; i < 256; i++)
1771 trans_table[i] = Py_CHARMASK(table[i]);
1772
1773 for (i = 0; i < dellen; i++)
1774 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1775
1776 for (i = inlen; --i >= 0; ) {
1777 c = Py_CHARMASK(*input++);
1778 if (trans_table[c] != -1)
1779 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1780 continue;
1781 changed = 1;
1782 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001783 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 Py_DECREF(result);
1785 Py_INCREF(input_obj);
1786 return input_obj;
1787 }
1788 /* Fix the size of the resulting string */
1789 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1790 return NULL;
1791 return result;
1792}
1793
1794
1795/* What follows is used for implementing replace(). Perry Stoll. */
1796
1797/*
1798 mymemfind
1799
1800 strstr replacement for arbitrary blocks of memory.
1801
Barry Warsaw51ac5802000-03-20 16:36:48 +00001802 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 contents of memory pointed to by PAT. Returns the index into MEM if
1804 found, or -1 if not found. If len of PAT is greater than length of
1805 MEM, the function returns -1.
1806*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001807static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001808mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809{
1810 register int ii;
1811
1812 /* pattern can not occur in the last pat_len-1 chars */
1813 len -= pat_len;
1814
1815 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001816 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 return ii;
1818 }
1819 }
1820 return -1;
1821}
1822
1823/*
1824 mymemcnt
1825
1826 Return the number of distinct times PAT is found in MEM.
1827 meaning mem=1111 and pat==11 returns 2.
1828 mem=11111 and pat==11 also return 2.
1829 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001830static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001831mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832{
1833 register int offset = 0;
1834 int nfound = 0;
1835
1836 while (len >= 0) {
1837 offset = mymemfind(mem, len, pat, pat_len);
1838 if (offset == -1)
1839 break;
1840 mem += offset + pat_len;
1841 len -= offset + pat_len;
1842 nfound++;
1843 }
1844 return nfound;
1845}
1846
1847/*
1848 mymemreplace
1849
Thomas Wouters7e474022000-07-16 12:04:32 +00001850 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 replaced with SUB.
1852
Thomas Wouters7e474022000-07-16 12:04:32 +00001853 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 of PAT in STR, then the original string is returned. Otherwise, a new
1855 string is allocated here and returned.
1856
1857 on return, out_len is:
1858 the length of output string, or
1859 -1 if the input string is returned, or
1860 unchanged if an error occurs (no memory).
1861
1862 return value is:
1863 the new string allocated locally, or
1864 NULL if an error occurred.
1865*/
1866static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001867mymemreplace(const char *str, int len, /* input string */
1868 const char *pat, int pat_len, /* pattern string to find */
1869 const char *sub, int sub_len, /* substitution string */
1870 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001871 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872{
1873 char *out_s;
1874 char *new_s;
1875 int nfound, offset, new_len;
1876
1877 if (len == 0 || pat_len > len)
1878 goto return_same;
1879
1880 /* find length of output string */
1881 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001882 if (count < 0)
1883 count = INT_MAX;
1884 else if (nfound > count)
1885 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 if (nfound == 0)
1887 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001888
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001890 if (new_len == 0) {
1891 /* Have to allocate something for the caller to free(). */
1892 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001893 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001894 return NULL;
1895 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001897 else {
1898 assert(new_len > 0);
1899 new_s = (char *)PyMem_MALLOC(new_len);
1900 if (new_s == NULL)
1901 return NULL;
1902 out_s = new_s;
1903
Tim Peters9c012af2001-05-10 00:32:57 +00001904 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001905 /* find index of next instance of pattern */
1906 offset = mymemfind(str, len, pat, pat_len);
1907 if (offset == -1)
1908 break;
1909
1910 /* copy non matching part of input string */
1911 memcpy(new_s, str, offset);
1912 str += offset + pat_len;
1913 len -= offset + pat_len;
1914
1915 /* copy substitute into the output string */
1916 new_s += offset;
1917 memcpy(new_s, sub, sub_len);
1918 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001919 }
1920 /* copy any remaining values into output string */
1921 if (len > 0)
1922 memcpy(new_s, str, len);
1923 }
1924 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 return out_s;
1926
1927 return_same:
1928 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001929 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930}
1931
1932
1933static char replace__doc__[] =
1934"S.replace (old, new[, maxsplit]) -> string\n\
1935\n\
1936Return a copy of string S with all occurrences of substring\n\
1937old replaced by new. If the optional argument maxsplit is\n\
1938given, only the first maxsplit occurrences are replaced.";
1939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 const char *str = PyString_AS_STRING(self), *sub, *repl;
1944 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001945 const int len = PyString_GET_SIZE(self);
1946 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001947 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 if (!PyArg_ParseTuple(args, "OO|i:replace",
1952 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954
1955 if (PyString_Check(subobj)) {
1956 sub = PyString_AS_STRING(subobj);
1957 sub_len = PyString_GET_SIZE(subobj);
1958 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001959#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001960 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001961 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001963#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001964 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1965 return NULL;
1966
1967 if (PyString_Check(replobj)) {
1968 repl = PyString_AS_STRING(replobj);
1969 repl_len = PyString_GET_SIZE(replobj);
1970 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001971#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001973 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001974 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001975#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1977 return NULL;
1978
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001979 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001980 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981 return NULL;
1982 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984 if (new_s == NULL) {
1985 PyErr_NoMemory();
1986 return NULL;
1987 }
1988 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001989 if (PyString_CheckExact(self)) {
1990 /* we're returning another reference to self */
1991 new = (PyObject*)self;
1992 Py_INCREF(new);
1993 }
1994 else {
1995 new = PyString_FromStringAndSize(str, len);
1996 if (new == NULL)
1997 return NULL;
1998 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999 }
2000 else {
2001 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002002 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003 }
2004 return new;
2005}
2006
2007
2008static char startswith__doc__[] =
2009"S.startswith(prefix[, start[, end]]) -> int\n\
2010\n\
2011Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2012optional start, test S beginning at that position. With optional end, stop\n\
2013comparing S at that position.";
2014
2015static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002016string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002018 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002020 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 int plen;
2022 int start = 0;
2023 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002024 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025
Guido van Rossumc6821402000-05-08 14:08:05 +00002026 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2027 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002028 return NULL;
2029 if (PyString_Check(subobj)) {
2030 prefix = PyString_AS_STRING(subobj);
2031 plen = PyString_GET_SIZE(subobj);
2032 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002033#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002034 else if (PyUnicode_Check(subobj)) {
2035 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002036 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002037 subobj, start, end, -1);
2038 if (rc == -1)
2039 return NULL;
2040 else
2041 return PyInt_FromLong((long) rc);
2042 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002043#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 return NULL;
2046
2047 /* adopt Java semantics for index out of range. it is legal for
2048 * offset to be == plen, but this only returns true if prefix is
2049 * the empty string.
2050 */
2051 if (start < 0 || start+plen > len)
2052 return PyInt_FromLong(0);
2053
2054 if (!memcmp(str+start, prefix, plen)) {
2055 /* did the match end after the specified end? */
2056 if (end < 0)
2057 return PyInt_FromLong(1);
2058 else if (end - start < plen)
2059 return PyInt_FromLong(0);
2060 else
2061 return PyInt_FromLong(1);
2062 }
2063 else return PyInt_FromLong(0);
2064}
2065
2066
2067static char endswith__doc__[] =
2068"S.endswith(suffix[, start[, end]]) -> int\n\
2069\n\
2070Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2071optional start, test S beginning at that position. With optional end, stop\n\
2072comparing S at that position.";
2073
2074static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002075string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002077 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079 const char* suffix;
2080 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 int start = 0;
2082 int end = -1;
2083 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085
Guido van Rossumc6821402000-05-08 14:08:05 +00002086 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2087 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 return NULL;
2089 if (PyString_Check(subobj)) {
2090 suffix = PyString_AS_STRING(subobj);
2091 slen = PyString_GET_SIZE(subobj);
2092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002093#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002094 else if (PyUnicode_Check(subobj)) {
2095 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002096 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002097 subobj, start, end, +1);
2098 if (rc == -1)
2099 return NULL;
2100 else
2101 return PyInt_FromLong((long) rc);
2102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002103#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105 return NULL;
2106
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 return PyInt_FromLong(0);
2109
2110 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114 return PyInt_FromLong(1);
2115 else return PyInt_FromLong(0);
2116}
2117
2118
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002119static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002120"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002121\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002122Encodes S using the codec registered for encoding. encoding defaults\n\
2123to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002124handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2125a ValueError. Other possible values are 'ignore' and 'replace'.";
2126
2127static PyObject *
2128string_encode(PyStringObject *self, PyObject *args)
2129{
2130 char *encoding = NULL;
2131 char *errors = NULL;
2132 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2133 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002134 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2135}
2136
2137
2138static char decode__doc__[] =
2139"S.decode([encoding[,errors]]) -> object\n\
2140\n\
2141Decodes S using the codec registered for encoding. encoding defaults\n\
2142to the default encoding. errors may be given to set a different error\n\
2143handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2144a ValueError. Other possible values are 'ignore' and 'replace'.";
2145
2146static PyObject *
2147string_decode(PyStringObject *self, PyObject *args)
2148{
2149 char *encoding = NULL;
2150 char *errors = NULL;
2151 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2152 return NULL;
2153 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002154}
2155
2156
Guido van Rossum4c08d552000-03-10 22:55:18 +00002157static char expandtabs__doc__[] =
2158"S.expandtabs([tabsize]) -> string\n\
2159\n\
2160Return a copy of S where all tab characters are expanded using spaces.\n\
2161If tabsize is not given, a tab size of 8 characters is assumed.";
2162
2163static PyObject*
2164string_expandtabs(PyStringObject *self, PyObject *args)
2165{
2166 const char *e, *p;
2167 char *q;
2168 int i, j;
2169 PyObject *u;
2170 int tabsize = 8;
2171
2172 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2173 return NULL;
2174
Thomas Wouters7e474022000-07-16 12:04:32 +00002175 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 i = j = 0;
2177 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2178 for (p = PyString_AS_STRING(self); p < e; p++)
2179 if (*p == '\t') {
2180 if (tabsize > 0)
2181 j += tabsize - (j % tabsize);
2182 }
2183 else {
2184 j++;
2185 if (*p == '\n' || *p == '\r') {
2186 i += j;
2187 j = 0;
2188 }
2189 }
2190
2191 /* Second pass: create output string and fill it */
2192 u = PyString_FromStringAndSize(NULL, i + j);
2193 if (!u)
2194 return NULL;
2195
2196 j = 0;
2197 q = PyString_AS_STRING(u);
2198
2199 for (p = PyString_AS_STRING(self); p < e; p++)
2200 if (*p == '\t') {
2201 if (tabsize > 0) {
2202 i = tabsize - (j % tabsize);
2203 j += i;
2204 while (i--)
2205 *q++ = ' ';
2206 }
2207 }
2208 else {
2209 j++;
2210 *q++ = *p;
2211 if (*p == '\n' || *p == '\r')
2212 j = 0;
2213 }
2214
2215 return u;
2216}
2217
Tim Peters8fa5dd02001-09-12 02:18:30 +00002218static PyObject *
2219pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220{
2221 PyObject *u;
2222
2223 if (left < 0)
2224 left = 0;
2225 if (right < 0)
2226 right = 0;
2227
Tim Peters8fa5dd02001-09-12 02:18:30 +00002228 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002229 Py_INCREF(self);
2230 return (PyObject *)self;
2231 }
2232
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002233 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002234 left + PyString_GET_SIZE(self) + right);
2235 if (u) {
2236 if (left)
2237 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002238 memcpy(PyString_AS_STRING(u) + left,
2239 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240 PyString_GET_SIZE(self));
2241 if (right)
2242 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2243 fill, right);
2244 }
2245
2246 return u;
2247}
2248
2249static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002250"S.ljust(width) -> string\n"
2251"\n"
2252"Return S left justified in a string of length width. Padding is\n"
2253"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
2255static PyObject *
2256string_ljust(PyStringObject *self, PyObject *args)
2257{
2258 int width;
2259 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2260 return NULL;
2261
Tim Peters8fa5dd02001-09-12 02:18:30 +00002262 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263 Py_INCREF(self);
2264 return (PyObject*) self;
2265 }
2266
2267 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2268}
2269
2270
2271static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002272"S.rjust(width) -> string\n"
2273"\n"
2274"Return S right justified in a string of length width. Padding is\n"
2275"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276
2277static PyObject *
2278string_rjust(PyStringObject *self, PyObject *args)
2279{
2280 int width;
2281 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2282 return NULL;
2283
Tim Peters8fa5dd02001-09-12 02:18:30 +00002284 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285 Py_INCREF(self);
2286 return (PyObject*) self;
2287 }
2288
2289 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2290}
2291
2292
2293static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002294"S.center(width) -> string\n"
2295"\n"
2296"Return S centered in a string of length width. Padding is done\n"
2297"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298
2299static PyObject *
2300string_center(PyStringObject *self, PyObject *args)
2301{
2302 int marg, left;
2303 int width;
2304
2305 if (!PyArg_ParseTuple(args, "i:center", &width))
2306 return NULL;
2307
Tim Peters8fa5dd02001-09-12 02:18:30 +00002308 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 Py_INCREF(self);
2310 return (PyObject*) self;
2311 }
2312
2313 marg = width - PyString_GET_SIZE(self);
2314 left = marg / 2 + (marg & width & 1);
2315
2316 return pad(self, left, marg - left, ' ');
2317}
2318
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002320"S.isspace() -> int\n"
2321"\n"
2322"Return 1 if there are only whitespace characters in S,\n"
2323"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324
2325static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002326string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327{
Fred Drakeba096332000-07-09 07:04:36 +00002328 register const unsigned char *p
2329 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002330 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331
Guido van Rossum4c08d552000-03-10 22:55:18 +00002332 /* Shortcut for single character strings */
2333 if (PyString_GET_SIZE(self) == 1 &&
2334 isspace(*p))
2335 return PyInt_FromLong(1);
2336
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002337 /* Special case for empty strings */
2338 if (PyString_GET_SIZE(self) == 0)
2339 return PyInt_FromLong(0);
2340
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 e = p + PyString_GET_SIZE(self);
2342 for (; p < e; p++) {
2343 if (!isspace(*p))
2344 return PyInt_FromLong(0);
2345 }
2346 return PyInt_FromLong(1);
2347}
2348
2349
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002350static char isalpha__doc__[] =
2351"S.isalpha() -> int\n\
2352\n\
2353Return 1 if all characters in S are alphabetic\n\
2354and there is at least one character in S, 0 otherwise.";
2355
2356static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002357string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002358{
Fred Drakeba096332000-07-09 07:04:36 +00002359 register const unsigned char *p
2360 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002361 register const unsigned char *e;
2362
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002363 /* Shortcut for single character strings */
2364 if (PyString_GET_SIZE(self) == 1 &&
2365 isalpha(*p))
2366 return PyInt_FromLong(1);
2367
2368 /* Special case for empty strings */
2369 if (PyString_GET_SIZE(self) == 0)
2370 return PyInt_FromLong(0);
2371
2372 e = p + PyString_GET_SIZE(self);
2373 for (; p < e; p++) {
2374 if (!isalpha(*p))
2375 return PyInt_FromLong(0);
2376 }
2377 return PyInt_FromLong(1);
2378}
2379
2380
2381static char isalnum__doc__[] =
2382"S.isalnum() -> int\n\
2383\n\
2384Return 1 if all characters in S are alphanumeric\n\
2385and there is at least one character in S, 0 otherwise.";
2386
2387static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002388string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002389{
Fred Drakeba096332000-07-09 07:04:36 +00002390 register const unsigned char *p
2391 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002392 register const unsigned char *e;
2393
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002394 /* Shortcut for single character strings */
2395 if (PyString_GET_SIZE(self) == 1 &&
2396 isalnum(*p))
2397 return PyInt_FromLong(1);
2398
2399 /* Special case for empty strings */
2400 if (PyString_GET_SIZE(self) == 0)
2401 return PyInt_FromLong(0);
2402
2403 e = p + PyString_GET_SIZE(self);
2404 for (; p < e; p++) {
2405 if (!isalnum(*p))
2406 return PyInt_FromLong(0);
2407 }
2408 return PyInt_FromLong(1);
2409}
2410
2411
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412static char isdigit__doc__[] =
2413"S.isdigit() -> int\n\
2414\n\
2415Return 1 if there are only digit characters in S,\n\
24160 otherwise.";
2417
2418static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002419string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420{
Fred Drakeba096332000-07-09 07:04:36 +00002421 register const unsigned char *p
2422 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002423 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 /* Shortcut for single character strings */
2426 if (PyString_GET_SIZE(self) == 1 &&
2427 isdigit(*p))
2428 return PyInt_FromLong(1);
2429
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002430 /* Special case for empty strings */
2431 if (PyString_GET_SIZE(self) == 0)
2432 return PyInt_FromLong(0);
2433
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434 e = p + PyString_GET_SIZE(self);
2435 for (; p < e; p++) {
2436 if (!isdigit(*p))
2437 return PyInt_FromLong(0);
2438 }
2439 return PyInt_FromLong(1);
2440}
2441
2442
2443static char islower__doc__[] =
2444"S.islower() -> int\n\
2445\n\
2446Return 1 if all cased characters in S are lowercase and there is\n\
2447at least one cased character in S, 0 otherwise.";
2448
2449static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002450string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451{
Fred Drakeba096332000-07-09 07:04:36 +00002452 register const unsigned char *p
2453 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002454 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002455 int cased;
2456
Guido van Rossum4c08d552000-03-10 22:55:18 +00002457 /* Shortcut for single character strings */
2458 if (PyString_GET_SIZE(self) == 1)
2459 return PyInt_FromLong(islower(*p) != 0);
2460
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002461 /* Special case for empty strings */
2462 if (PyString_GET_SIZE(self) == 0)
2463 return PyInt_FromLong(0);
2464
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 e = p + PyString_GET_SIZE(self);
2466 cased = 0;
2467 for (; p < e; p++) {
2468 if (isupper(*p))
2469 return PyInt_FromLong(0);
2470 else if (!cased && islower(*p))
2471 cased = 1;
2472 }
2473 return PyInt_FromLong(cased);
2474}
2475
2476
2477static char isupper__doc__[] =
2478"S.isupper() -> int\n\
2479\n\
2480Return 1 if all cased characters in S are uppercase and there is\n\
2481at least one cased character in S, 0 otherwise.";
2482
2483static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002484string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002485{
Fred Drakeba096332000-07-09 07:04:36 +00002486 register const unsigned char *p
2487 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002488 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002489 int cased;
2490
Guido van Rossum4c08d552000-03-10 22:55:18 +00002491 /* Shortcut for single character strings */
2492 if (PyString_GET_SIZE(self) == 1)
2493 return PyInt_FromLong(isupper(*p) != 0);
2494
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002495 /* Special case for empty strings */
2496 if (PyString_GET_SIZE(self) == 0)
2497 return PyInt_FromLong(0);
2498
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 e = p + PyString_GET_SIZE(self);
2500 cased = 0;
2501 for (; p < e; p++) {
2502 if (islower(*p))
2503 return PyInt_FromLong(0);
2504 else if (!cased && isupper(*p))
2505 cased = 1;
2506 }
2507 return PyInt_FromLong(cased);
2508}
2509
2510
2511static char istitle__doc__[] =
2512"S.istitle() -> int\n\
2513\n\
2514Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2515may only follow uncased characters and lowercase characters only cased\n\
2516ones. Return 0 otherwise.";
2517
2518static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002519string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520{
Fred Drakeba096332000-07-09 07:04:36 +00002521 register const unsigned char *p
2522 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002523 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524 int cased, previous_is_cased;
2525
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 /* Shortcut for single character strings */
2527 if (PyString_GET_SIZE(self) == 1)
2528 return PyInt_FromLong(isupper(*p) != 0);
2529
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002530 /* Special case for empty strings */
2531 if (PyString_GET_SIZE(self) == 0)
2532 return PyInt_FromLong(0);
2533
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 e = p + PyString_GET_SIZE(self);
2535 cased = 0;
2536 previous_is_cased = 0;
2537 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002538 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539
2540 if (isupper(ch)) {
2541 if (previous_is_cased)
2542 return PyInt_FromLong(0);
2543 previous_is_cased = 1;
2544 cased = 1;
2545 }
2546 else if (islower(ch)) {
2547 if (!previous_is_cased)
2548 return PyInt_FromLong(0);
2549 previous_is_cased = 1;
2550 cased = 1;
2551 }
2552 else
2553 previous_is_cased = 0;
2554 }
2555 return PyInt_FromLong(cased);
2556}
2557
2558
2559static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002560"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561\n\
2562Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002563Line breaks are not included in the resulting list unless keepends\n\
2564is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565
2566#define SPLIT_APPEND(data, left, right) \
2567 str = PyString_FromStringAndSize(data + left, right - left); \
2568 if (!str) \
2569 goto onError; \
2570 if (PyList_Append(list, str)) { \
2571 Py_DECREF(str); \
2572 goto onError; \
2573 } \
2574 else \
2575 Py_DECREF(str);
2576
2577static PyObject*
2578string_splitlines(PyStringObject *self, PyObject *args)
2579{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580 register int i;
2581 register int j;
2582 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002583 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 PyObject *list;
2585 PyObject *str;
2586 char *data;
2587
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002588 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589 return NULL;
2590
2591 data = PyString_AS_STRING(self);
2592 len = PyString_GET_SIZE(self);
2593
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 list = PyList_New(0);
2595 if (!list)
2596 goto onError;
2597
2598 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002599 int eol;
2600
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 /* Find a line and append it */
2602 while (i < len && data[i] != '\n' && data[i] != '\r')
2603 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604
2605 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002606 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607 if (i < len) {
2608 if (data[i] == '\r' && i + 1 < len &&
2609 data[i+1] == '\n')
2610 i += 2;
2611 else
2612 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002613 if (keepends)
2614 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002616 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 j = i;
2618 }
2619 if (j < len) {
2620 SPLIT_APPEND(data, j, len);
2621 }
2622
2623 return list;
2624
2625 onError:
2626 Py_DECREF(list);
2627 return NULL;
2628}
2629
2630#undef SPLIT_APPEND
2631
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002632
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002633static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 /* Counterparts of the obsolete stropmodule functions; except
2636 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002637 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2638 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2639 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2640 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2641 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2642 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2643 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2644 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2645 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2646 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2647 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2648 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2649 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2650 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2651 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2652 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2653 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2654 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2655 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2656 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2657 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2658 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2659 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2660 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2661 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2662 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2663 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2664 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2665 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2666 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2667 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2668 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2669 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002670#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002671 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673 {NULL, NULL} /* sentinel */
2674};
2675
Guido van Rossumae960af2001-08-30 03:11:59 +00002676staticforward PyObject *
2677str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2678
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002680string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002681{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002682 PyObject *x = NULL;
2683 static char *kwlist[] = {"object", 0};
2684
Guido van Rossumae960af2001-08-30 03:11:59 +00002685 if (type != &PyString_Type)
2686 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002687 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2688 return NULL;
2689 if (x == NULL)
2690 return PyString_FromString("");
2691 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002692}
2693
Guido van Rossumae960af2001-08-30 03:11:59 +00002694static PyObject *
2695str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2696{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002697 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002698 int n;
2699
2700 assert(PyType_IsSubtype(type, &PyString_Type));
2701 tmp = string_new(&PyString_Type, args, kwds);
2702 if (tmp == NULL)
2703 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002704 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002705 n = PyString_GET_SIZE(tmp);
2706 pnew = type->tp_alloc(type, n);
2707 if (pnew != NULL) {
2708 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2709#ifdef CACHE_HASH
2710 ((PyStringObject *)pnew)->ob_shash =
2711 ((PyStringObject *)tmp)->ob_shash;
2712#endif
2713#ifdef INTERN_STRINGS
2714 ((PyStringObject *)pnew)->ob_sinterned =
2715 ((PyStringObject *)tmp)->ob_sinterned;
2716#endif
2717 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002718 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002719 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002720}
2721
Tim Peters6d6c1a32001-08-02 04:15:00 +00002722static char string_doc[] =
2723"str(object) -> string\n\
2724\n\
2725Return a nice string representation of the object.\n\
2726If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002727
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002728PyTypeObject PyString_Type = {
2729 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002730 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002731 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002732 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002733 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002734 (destructor)string_dealloc, /* tp_dealloc */
2735 (printfunc)string_print, /* tp_print */
2736 0, /* tp_getattr */
2737 0, /* tp_setattr */
2738 0, /* tp_compare */
2739 (reprfunc)string_repr, /* tp_repr */
2740 0, /* tp_as_number */
2741 &string_as_sequence, /* tp_as_sequence */
2742 0, /* tp_as_mapping */
2743 (hashfunc)string_hash, /* tp_hash */
2744 0, /* tp_call */
2745 (reprfunc)string_str, /* tp_str */
2746 PyObject_GenericGetAttr, /* tp_getattro */
2747 0, /* tp_setattro */
2748 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002749 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002750 string_doc, /* tp_doc */
2751 0, /* tp_traverse */
2752 0, /* tp_clear */
2753 (richcmpfunc)string_richcompare, /* tp_richcompare */
2754 0, /* tp_weaklistoffset */
2755 0, /* tp_iter */
2756 0, /* tp_iternext */
2757 string_methods, /* tp_methods */
2758 0, /* tp_members */
2759 0, /* tp_getset */
2760 0, /* tp_base */
2761 0, /* tp_dict */
2762 0, /* tp_descr_get */
2763 0, /* tp_descr_set */
2764 0, /* tp_dictoffset */
2765 0, /* tp_init */
2766 0, /* tp_alloc */
2767 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002768 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002769};
2770
2771void
Fred Drakeba096332000-07-09 07:04:36 +00002772PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002773{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002774 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002775 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002776 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002777 if (w == NULL || !PyString_Check(*pv)) {
2778 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002779 *pv = NULL;
2780 return;
2781 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 v = string_concat((PyStringObject *) *pv, w);
2783 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002784 *pv = v;
2785}
2786
Guido van Rossum013142a1994-08-30 08:19:36 +00002787void
Fred Drakeba096332000-07-09 07:04:36 +00002788PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002789{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyString_Concat(pv, w);
2791 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002792}
2793
2794
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002795/* The following function breaks the notion that strings are immutable:
2796 it changes the size of a string. We get away with this only if there
2797 is only one module referencing the object. You can also think of it
2798 as creating a new string object and destroying the old one, only
2799 more efficiently. In any case, don't use this if the string may
2800 already be known to some other part of the code... */
2801
2802int
Fred Drakeba096332000-07-09 07:04:36 +00002803_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002804{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002805 register PyObject *v;
2806 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002807 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002808 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002809 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002810 Py_DECREF(v);
2811 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002812 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002813 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002814 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002815#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002816 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002817#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 _Py_ForgetReference(v);
2819 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002820 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002821 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002822 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002823 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002825 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002826 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002827 _Py_NewReference(*pv);
2828 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002829 sv->ob_size = newsize;
2830 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002831 return 0;
2832}
Guido van Rossume5372401993-03-16 12:15:04 +00002833
2834/* Helpers for formatstring */
2835
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002836static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002837getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002838{
2839 int argidx = *p_argidx;
2840 if (argidx < arglen) {
2841 (*p_argidx)++;
2842 if (arglen < 0)
2843 return args;
2844 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002846 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002847 PyErr_SetString(PyExc_TypeError,
2848 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002849 return NULL;
2850}
2851
Tim Peters38fd5b62000-09-21 05:43:11 +00002852/* Format codes
2853 * F_LJUST '-'
2854 * F_SIGN '+'
2855 * F_BLANK ' '
2856 * F_ALT '#'
2857 * F_ZERO '0'
2858 */
Guido van Rossume5372401993-03-16 12:15:04 +00002859#define F_LJUST (1<<0)
2860#define F_SIGN (1<<1)
2861#define F_BLANK (1<<2)
2862#define F_ALT (1<<3)
2863#define F_ZERO (1<<4)
2864
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002865static int
Fred Drakeba096332000-07-09 07:04:36 +00002866formatfloat(char *buf, size_t buflen, int flags,
2867 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002868{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002869 /* fmt = '%#.' + `prec` + `type`
2870 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002871 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002872 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002873 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002874 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002875 if (prec < 0)
2876 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002877 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2878 type = 'g';
2879 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002880 /* worst case length calc to ensure no buffer overrun:
2881 fmt = %#.<prec>g
2882 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002883 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002884 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2885 If prec=0 the effective precision is 1 (the leading digit is
2886 always given), therefore increase by one to 10+prec. */
2887 if (buflen <= (size_t)10 + (size_t)prec) {
2888 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002889 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002890 return -1;
2891 }
Guido van Rossume5372401993-03-16 12:15:04 +00002892 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002893 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002894}
2895
Tim Peters38fd5b62000-09-21 05:43:11 +00002896/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2897 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2898 * Python's regular ints.
2899 * Return value: a new PyString*, or NULL if error.
2900 * . *pbuf is set to point into it,
2901 * *plen set to the # of chars following that.
2902 * Caller must decref it when done using pbuf.
2903 * The string starting at *pbuf is of the form
2904 * "-"? ("0x" | "0X")? digit+
2905 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002906 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002907 * There will be at least prec digits, zero-filled on the left if
2908 * necessary to get that many.
2909 * val object to be converted
2910 * flags bitmask of format flags; only F_ALT is looked at
2911 * prec minimum number of digits; 0-fill on left if needed
2912 * type a character in [duoxX]; u acts the same as d
2913 *
2914 * CAUTION: o, x and X conversions on regular ints can never
2915 * produce a '-' sign, but can for Python's unbounded ints.
2916 */
2917PyObject*
2918_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2919 char **pbuf, int *plen)
2920{
2921 PyObject *result = NULL;
2922 char *buf;
2923 int i;
2924 int sign; /* 1 if '-', else 0 */
2925 int len; /* number of characters */
2926 int numdigits; /* len == numnondigits + numdigits */
2927 int numnondigits = 0;
2928
2929 switch (type) {
2930 case 'd':
2931 case 'u':
2932 result = val->ob_type->tp_str(val);
2933 break;
2934 case 'o':
2935 result = val->ob_type->tp_as_number->nb_oct(val);
2936 break;
2937 case 'x':
2938 case 'X':
2939 numnondigits = 2;
2940 result = val->ob_type->tp_as_number->nb_hex(val);
2941 break;
2942 default:
2943 assert(!"'type' not in [duoxX]");
2944 }
2945 if (!result)
2946 return NULL;
2947
2948 /* To modify the string in-place, there can only be one reference. */
2949 if (result->ob_refcnt != 1) {
2950 PyErr_BadInternalCall();
2951 return NULL;
2952 }
2953 buf = PyString_AsString(result);
2954 len = PyString_Size(result);
2955 if (buf[len-1] == 'L') {
2956 --len;
2957 buf[len] = '\0';
2958 }
2959 sign = buf[0] == '-';
2960 numnondigits += sign;
2961 numdigits = len - numnondigits;
2962 assert(numdigits > 0);
2963
Tim Petersfff53252001-04-12 18:38:48 +00002964 /* Get rid of base marker unless F_ALT */
2965 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002966 /* Need to skip 0x, 0X or 0. */
2967 int skipped = 0;
2968 switch (type) {
2969 case 'o':
2970 assert(buf[sign] == '0');
2971 /* If 0 is only digit, leave it alone. */
2972 if (numdigits > 1) {
2973 skipped = 1;
2974 --numdigits;
2975 }
2976 break;
2977 case 'x':
2978 case 'X':
2979 assert(buf[sign] == '0');
2980 assert(buf[sign + 1] == 'x');
2981 skipped = 2;
2982 numnondigits -= 2;
2983 break;
2984 }
2985 if (skipped) {
2986 buf += skipped;
2987 len -= skipped;
2988 if (sign)
2989 buf[0] = '-';
2990 }
2991 assert(len == numnondigits + numdigits);
2992 assert(numdigits > 0);
2993 }
2994
2995 /* Fill with leading zeroes to meet minimum width. */
2996 if (prec > numdigits) {
2997 PyObject *r1 = PyString_FromStringAndSize(NULL,
2998 numnondigits + prec);
2999 char *b1;
3000 if (!r1) {
3001 Py_DECREF(result);
3002 return NULL;
3003 }
3004 b1 = PyString_AS_STRING(r1);
3005 for (i = 0; i < numnondigits; ++i)
3006 *b1++ = *buf++;
3007 for (i = 0; i < prec - numdigits; i++)
3008 *b1++ = '0';
3009 for (i = 0; i < numdigits; i++)
3010 *b1++ = *buf++;
3011 *b1 = '\0';
3012 Py_DECREF(result);
3013 result = r1;
3014 buf = PyString_AS_STRING(result);
3015 len = numnondigits + prec;
3016 }
3017
3018 /* Fix up case for hex conversions. */
3019 switch (type) {
3020 case 'x':
3021 /* Need to convert all upper case letters to lower case. */
3022 for (i = 0; i < len; i++)
3023 if (buf[i] >= 'A' && buf[i] <= 'F')
3024 buf[i] += 'a'-'A';
3025 break;
3026 case 'X':
3027 /* Need to convert 0x to 0X (and -0x to -0X). */
3028 if (buf[sign + 1] == 'x')
3029 buf[sign + 1] = 'X';
3030 break;
3031 }
3032 *pbuf = buf;
3033 *plen = len;
3034 return result;
3035}
3036
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003037static int
Fred Drakeba096332000-07-09 07:04:36 +00003038formatint(char *buf, size_t buflen, int flags,
3039 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003040{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003041 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003042 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3043 + 1 + 1 = 24 */
3044 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003045 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003046 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003047 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003048 if (prec < 0)
3049 prec = 1;
3050 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003051 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003052 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003053 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003054 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003055 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003056 return -1;
3057 }
Guido van Rossume5372401993-03-16 12:15:04 +00003058 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003059 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3060 * but we want it (for consistency with other %#x conversions, and
3061 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003062 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3063 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3064 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003065 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003066 if (x == 0 &&
3067 (flags & F_ALT) &&
3068 (type == 'x' || type == 'X') &&
3069 buf[1] != (char)type) /* this last always true under std C */
3070 {
Tim Petersfff53252001-04-12 18:38:48 +00003071 memmove(buf+2, buf, strlen(buf) + 1);
3072 buf[0] = '0';
3073 buf[1] = (char)type;
3074 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003075 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003076}
3077
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003078static int
Fred Drakeba096332000-07-09 07:04:36 +00003079formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003080{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003081 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082 if (PyString_Check(v)) {
3083 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003084 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003085 }
3086 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003087 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003088 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003089 }
3090 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003091 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003092}
3093
Guido van Rossum013142a1994-08-30 08:19:36 +00003094
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003095/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3096
3097 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3098 chars are formatted. XXX This is a magic number. Each formatting
3099 routine does bounds checking to ensure no overflow, but a better
3100 solution may be to malloc a buffer of appropriate size for each
3101 format. For now, the current solution is sufficient.
3102*/
3103#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003104
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003106PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003107{
3108 char *fmt, *res;
3109 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003110 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003111 PyObject *result, *orig_args;
3112#ifdef Py_USING_UNICODE
3113 PyObject *v, *w;
3114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003115 PyObject *dict = NULL;
3116 if (format == NULL || !PyString_Check(format) || args == NULL) {
3117 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003118 return NULL;
3119 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003120 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003121 fmt = PyString_AsString(format);
3122 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003123 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003124 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003125 if (result == NULL)
3126 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003127 res = PyString_AsString(result);
3128 if (PyTuple_Check(args)) {
3129 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003130 argidx = 0;
3131 }
3132 else {
3133 arglen = -1;
3134 argidx = -2;
3135 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003136 if (args->ob_type->tp_as_mapping)
3137 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003138 while (--fmtcnt >= 0) {
3139 if (*fmt != '%') {
3140 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003141 rescnt = fmtcnt + 100;
3142 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003144 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003145 res = PyString_AsString(result)
3146 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003147 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003148 }
3149 *res++ = *fmt++;
3150 }
3151 else {
3152 /* Got a format specifier */
3153 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003154 int width = -1;
3155 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003156 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003157 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003158 PyObject *v = NULL;
3159 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003160 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003161 int sign;
3162 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003163 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003164#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003165 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003166 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003167#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003168
Guido van Rossumda9c2711996-12-05 21:58:58 +00003169 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003170 if (*fmt == '(') {
3171 char *keystart;
3172 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003173 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003174 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003175
3176 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003177 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003178 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003179 goto error;
3180 }
3181 ++fmt;
3182 --fmtcnt;
3183 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003184 /* Skip over balanced parentheses */
3185 while (pcount > 0 && --fmtcnt >= 0) {
3186 if (*fmt == ')')
3187 --pcount;
3188 else if (*fmt == '(')
3189 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003190 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003191 }
3192 keylen = fmt - keystart - 1;
3193 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003194 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003195 "incomplete format key");
3196 goto error;
3197 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003198 key = PyString_FromStringAndSize(keystart,
3199 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003200 if (key == NULL)
3201 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003202 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003203 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003204 args_owned = 0;
3205 }
3206 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003207 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003208 if (args == NULL) {
3209 goto error;
3210 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003211 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003212 arglen = -1;
3213 argidx = -2;
3214 }
Guido van Rossume5372401993-03-16 12:15:04 +00003215 while (--fmtcnt >= 0) {
3216 switch (c = *fmt++) {
3217 case '-': flags |= F_LJUST; continue;
3218 case '+': flags |= F_SIGN; continue;
3219 case ' ': flags |= F_BLANK; continue;
3220 case '#': flags |= F_ALT; continue;
3221 case '0': flags |= F_ZERO; continue;
3222 }
3223 break;
3224 }
3225 if (c == '*') {
3226 v = getnextarg(args, arglen, &argidx);
3227 if (v == NULL)
3228 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003229 if (!PyInt_Check(v)) {
3230 PyErr_SetString(PyExc_TypeError,
3231 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003232 goto error;
3233 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003234 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003235 if (width < 0) {
3236 flags |= F_LJUST;
3237 width = -width;
3238 }
Guido van Rossume5372401993-03-16 12:15:04 +00003239 if (--fmtcnt >= 0)
3240 c = *fmt++;
3241 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003242 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003243 width = c - '0';
3244 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003245 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003246 if (!isdigit(c))
3247 break;
3248 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003249 PyErr_SetString(
3250 PyExc_ValueError,
3251 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003252 goto error;
3253 }
3254 width = width*10 + (c - '0');
3255 }
3256 }
3257 if (c == '.') {
3258 prec = 0;
3259 if (--fmtcnt >= 0)
3260 c = *fmt++;
3261 if (c == '*') {
3262 v = getnextarg(args, arglen, &argidx);
3263 if (v == NULL)
3264 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003265 if (!PyInt_Check(v)) {
3266 PyErr_SetString(
3267 PyExc_TypeError,
3268 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003269 goto error;
3270 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003271 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003272 if (prec < 0)
3273 prec = 0;
3274 if (--fmtcnt >= 0)
3275 c = *fmt++;
3276 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003277 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003278 prec = c - '0';
3279 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003280 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003281 if (!isdigit(c))
3282 break;
3283 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003284 PyErr_SetString(
3285 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003286 "prec too big");
3287 goto error;
3288 }
3289 prec = prec*10 + (c - '0');
3290 }
3291 }
3292 } /* prec */
3293 if (fmtcnt >= 0) {
3294 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003295 if (--fmtcnt >= 0)
3296 c = *fmt++;
3297 }
3298 }
3299 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003300 PyErr_SetString(PyExc_ValueError,
3301 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003302 goto error;
3303 }
3304 if (c != '%') {
3305 v = getnextarg(args, arglen, &argidx);
3306 if (v == NULL)
3307 goto error;
3308 }
3309 sign = 0;
3310 fill = ' ';
3311 switch (c) {
3312 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003313 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003314 len = 1;
3315 break;
3316 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003317 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003318#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003319 if (PyUnicode_Check(v)) {
3320 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003321 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003322 goto unicode;
3323 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003324#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003325 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003326 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003327 else
3328 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003329 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003330 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003331 if (!PyString_Check(temp)) {
3332 PyErr_SetString(PyExc_TypeError,
3333 "%s argument has non-string str()");
3334 goto error;
3335 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003336 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003337 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003338 if (prec >= 0 && len > prec)
3339 len = prec;
3340 break;
3341 case 'i':
3342 case 'd':
3343 case 'u':
3344 case 'o':
3345 case 'x':
3346 case 'X':
3347 if (c == 'i')
3348 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003349 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003350 temp = _PyString_FormatLong(v, flags,
3351 prec, c, &pbuf, &len);
3352 if (!temp)
3353 goto error;
3354 /* unbounded ints can always produce
3355 a sign character! */
3356 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003357 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003358 else {
3359 pbuf = formatbuf;
3360 len = formatint(pbuf, sizeof(formatbuf),
3361 flags, prec, c, v);
3362 if (len < 0)
3363 goto error;
3364 /* only d conversion is signed */
3365 sign = c == 'd';
3366 }
3367 if (flags & F_ZERO)
3368 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003369 break;
3370 case 'e':
3371 case 'E':
3372 case 'f':
3373 case 'g':
3374 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003375 pbuf = formatbuf;
3376 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003377 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003378 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003379 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003380 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003381 fill = '0';
3382 break;
3383 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003384 pbuf = formatbuf;
3385 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003386 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003387 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003388 break;
3389 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003390 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003391 "unsupported format character '%c' (0x%x) "
3392 "at index %i",
3393 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003394 goto error;
3395 }
3396 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003397 if (*pbuf == '-' || *pbuf == '+') {
3398 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003399 len--;
3400 }
3401 else if (flags & F_SIGN)
3402 sign = '+';
3403 else if (flags & F_BLANK)
3404 sign = ' ';
3405 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003406 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003407 }
3408 if (width < len)
3409 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003410 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003411 reslen -= rescnt;
3412 rescnt = width + fmtcnt + 100;
3413 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003414 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003415 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003416 res = PyString_AsString(result)
3417 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003418 }
3419 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003420 if (fill != ' ')
3421 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003422 rescnt--;
3423 if (width > len)
3424 width--;
3425 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003426 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3427 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003428 assert(pbuf[1] == c);
3429 if (fill != ' ') {
3430 *res++ = *pbuf++;
3431 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003432 }
Tim Petersfff53252001-04-12 18:38:48 +00003433 rescnt -= 2;
3434 width -= 2;
3435 if (width < 0)
3436 width = 0;
3437 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003438 }
3439 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003440 do {
3441 --rescnt;
3442 *res++ = fill;
3443 } while (--width > len);
3444 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003445 if (fill == ' ') {
3446 if (sign)
3447 *res++ = sign;
3448 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003449 (c == 'x' || c == 'X')) {
3450 assert(pbuf[0] == '0');
3451 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003452 *res++ = *pbuf++;
3453 *res++ = *pbuf++;
3454 }
3455 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003456 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003457 res += len;
3458 rescnt -= len;
3459 while (--width >= len) {
3460 --rescnt;
3461 *res++ = ' ';
3462 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003463 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003464 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003465 "not all arguments converted");
3466 goto error;
3467 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003468 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003469 } /* '%' */
3470 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003471 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003472 PyErr_SetString(PyExc_TypeError,
3473 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003474 goto error;
3475 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003476 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003477 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003478 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003479 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003480 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003481
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003482#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003483 unicode:
3484 if (args_owned) {
3485 Py_DECREF(args);
3486 args_owned = 0;
3487 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003488 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003489 if (PyTuple_Check(orig_args) && argidx > 0) {
3490 PyObject *v;
3491 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3492 v = PyTuple_New(n);
3493 if (v == NULL)
3494 goto error;
3495 while (--n >= 0) {
3496 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3497 Py_INCREF(w);
3498 PyTuple_SET_ITEM(v, n, w);
3499 }
3500 args = v;
3501 } else {
3502 Py_INCREF(orig_args);
3503 args = orig_args;
3504 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003505 args_owned = 1;
3506 /* Take what we have of the result and let the Unicode formatting
3507 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003508 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003509 if (_PyString_Resize(&result, rescnt))
3510 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003511 fmtcnt = PyString_GET_SIZE(format) - \
3512 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003513 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3514 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003515 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003516 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003517 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003518 if (v == NULL)
3519 goto error;
3520 /* Paste what we have (result) to what the Unicode formatting
3521 function returned (v) and return the result (or error) */
3522 w = PyUnicode_Concat(result, v);
3523 Py_DECREF(result);
3524 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003525 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003526 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003527#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003528
Guido van Rossume5372401993-03-16 12:15:04 +00003529 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003530 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003531 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003533 }
Guido van Rossume5372401993-03-16 12:15:04 +00003534 return NULL;
3535}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003536
3537
3538#ifdef INTERN_STRINGS
3539
Barry Warsaw4df762f2000-08-16 23:41:01 +00003540/* This dictionary will leak at PyString_Fini() time. That's acceptable
3541 * because PyString_Fini() specifically frees interned strings that are
3542 * only referenced by this dictionary. The CVS log entry for revision 2.45
3543 * says:
3544 *
3545 * Change the Fini function to only remove otherwise unreferenced
3546 * strings from the interned table. There are references in
3547 * hard-to-find static variables all over the interpreter, and it's not
3548 * worth trying to get rid of all those; but "uninterning" isn't fair
3549 * either and may cause subtle failures later -- so we have to keep them
3550 * in the interned table.
3551 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003552static PyObject *interned;
3553
3554void
Fred Drakeba096332000-07-09 07:04:36 +00003555PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003556{
3557 register PyStringObject *s = (PyStringObject *)(*p);
3558 PyObject *t;
3559 if (s == NULL || !PyString_Check(s))
3560 Py_FatalError("PyString_InternInPlace: strings only please!");
3561 if ((t = s->ob_sinterned) != NULL) {
3562 if (t == (PyObject *)s)
3563 return;
3564 Py_INCREF(t);
3565 *p = t;
3566 Py_DECREF(s);
3567 return;
3568 }
3569 if (interned == NULL) {
3570 interned = PyDict_New();
3571 if (interned == NULL)
3572 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003573 }
3574 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3575 Py_INCREF(t);
3576 *p = s->ob_sinterned = t;
3577 Py_DECREF(s);
3578 return;
3579 }
Tim Peters111f6092001-09-12 07:54:51 +00003580 /* Ensure that only true string objects appear in the intern dict,
3581 and as the value of ob_sinterned. */
3582 if (PyString_CheckExact(s)) {
3583 t = (PyObject *)s;
3584 if (PyDict_SetItem(interned, t, t) == 0) {
3585 s->ob_sinterned = t;
3586 return;
3587 }
3588 }
3589 else {
3590 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3591 PyString_GET_SIZE(s));
3592 if (t != NULL) {
3593 if (PyDict_SetItem(interned, t, t) == 0) {
3594 *p = s->ob_sinterned = t;
3595 Py_DECREF(s);
3596 return;
3597 }
3598 Py_DECREF(t);
3599 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003600 }
3601 PyErr_Clear();
3602}
3603
3604
3605PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003606PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003607{
3608 PyObject *s = PyString_FromString(cp);
3609 if (s == NULL)
3610 return NULL;
3611 PyString_InternInPlace(&s);
3612 return s;
3613}
3614
3615#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003616
3617void
Fred Drakeba096332000-07-09 07:04:36 +00003618PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003619{
3620 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003621 for (i = 0; i < UCHAR_MAX + 1; i++) {
3622 Py_XDECREF(characters[i]);
3623 characters[i] = NULL;
3624 }
3625#ifndef DONT_SHARE_SHORT_STRINGS
3626 Py_XDECREF(nullstring);
3627 nullstring = NULL;
3628#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003629#ifdef INTERN_STRINGS
3630 if (interned) {
3631 int pos, changed;
3632 PyObject *key, *value;
3633 do {
3634 changed = 0;
3635 pos = 0;
3636 while (PyDict_Next(interned, &pos, &key, &value)) {
3637 if (key->ob_refcnt == 2 && key == value) {
3638 PyDict_DelItem(interned, key);
3639 changed = 1;
3640 }
3641 }
3642 } while (changed);
3643 }
3644#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003645}
Barry Warsawa903ad982001-02-23 16:40:48 +00003646
3647#ifdef INTERN_STRINGS
3648void _Py_ReleaseInternedStrings(void)
3649{
3650 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003651 fprintf(stderr, "releasing interned strings\n");
3652 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003653 Py_DECREF(interned);
3654 interned = NULL;
3655 }
3656}
3657#endif /* INTERN_STRINGS */