blob: aea31aca6b1a0f6d59b4781b9abe5feda7a0487d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
Tim Petersc15c4f12001-10-02 21:32:07 +0000153 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
Tim Petersc15c4f12001-10-02 21:32:07 +0000159#ifdef VA_LIST_IS_ARRAY
160 memcpy(count, vargs, sizeof(va_list));
161#else
162 count = vargs;
163#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000164 /* step 1: figure out how large a buffer we need */
165 for (f = format; *f; f++) {
166 if (*f == '%') {
167 const char* p = f;
168 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
169 ;
170
171 /* skip the 'l' in %ld, since it doesn't change the
172 width. although only %d is supported (see
173 "expand" section below), others can be easily
174 add */
175 if (*f == 'l' && *(f+1) == 'd')
176 ++f;
177
178 switch (*f) {
179 case 'c':
180 (void)va_arg(count, int);
181 /* fall through... */
182 case '%':
183 n++;
184 break;
185 case 'd': case 'i': case 'x':
186 (void) va_arg(count, int);
187 /* 20 bytes should be enough to hold a 64-bit
188 integer */
189 n += 20;
190 break;
191 case 's':
192 s = va_arg(count, char*);
193 n += strlen(s);
194 break;
195 case 'p':
196 (void) va_arg(count, int);
197 /* maximum 64-bit pointer representation:
198 * 0xffffffffffffffff
199 * so 19 characters is enough.
200 */
201 n += 19;
202 break;
203 default:
204 /* if we stumble upon an unknown
205 formatting code, copy the rest of
206 the format string to the output
207 string. (we cannot just skip the
208 code, since there's no way to know
209 what's in the argument list) */
210 n += strlen(p);
211 goto expand;
212 }
213 } else
214 n++;
215 }
216 expand:
217 /* step 2: fill the buffer */
218 string = PyString_FromStringAndSize(NULL, n);
219 if (!string)
220 return NULL;
221
222 s = PyString_AsString(string);
223
224 for (f = format; *f; f++) {
225 if (*f == '%') {
226 const char* p = f++;
227 int i, longflag = 0;
228 /* parse the width.precision part (we're only
229 interested in the precision value, if any) */
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 if (*f == '.') {
234 f++;
235 n = 0;
236 while (isdigit(Py_CHARMASK(*f)))
237 n = (n*10) + *f++ - '0';
238 }
239 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
240 f++;
241 /* handle the long flag, but only for %ld. others
242 can be added when necessary. */
243 if (*f == 'l' && *(f+1) == 'd') {
244 longflag = 1;
245 ++f;
246 }
247
248 switch (*f) {
249 case 'c':
250 *s++ = va_arg(vargs, int);
251 break;
252 case 'd':
253 if (longflag)
254 sprintf(s, "%ld", va_arg(vargs, long));
255 else
256 sprintf(s, "%d", va_arg(vargs, int));
257 s += strlen(s);
258 break;
259 case 'i':
260 sprintf(s, "%i", va_arg(vargs, int));
261 s += strlen(s);
262 break;
263 case 'x':
264 sprintf(s, "%x", va_arg(vargs, int));
265 s += strlen(s);
266 break;
267 case 's':
268 p = va_arg(vargs, char*);
269 i = strlen(p);
270 if (n > 0 && i > n)
271 i = n;
272 memcpy(s, p, i);
273 s += i;
274 break;
275 case 'p':
276 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000277 /* %p is ill-defined: ensure leading 0x. */
278 if (s[1] == 'X')
279 s[1] = 'x';
280 else if (s[1] != 'x') {
281 memmove(s+2, s, strlen(s)+1);
282 s[0] = '0';
283 s[1] = 'x';
284 }
Barry Warsawdadace02001-08-24 18:32:06 +0000285 s += strlen(s);
286 break;
287 case '%':
288 *s++ = '%';
289 break;
290 default:
291 strcpy(s, p);
292 s += strlen(s);
293 goto end;
294 }
295 } else
296 *s++ = *f;
297 }
298
299 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000300 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000301 return string;
302}
303
304PyObject *
305PyString_FromFormat(const char *format, ...)
306{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000307 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000308 va_list vargs;
309
310#ifdef HAVE_STDARG_PROTOTYPES
311 va_start(vargs, format);
312#else
313 va_start(vargs);
314#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000315 ret = PyString_FromFormatV(format, vargs);
316 va_end(vargs);
317 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000318}
319
320
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000321PyObject *PyString_Decode(const char *s,
322 int size,
323 const char *encoding,
324 const char *errors)
325{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000326 PyObject *v, *str;
327
328 str = PyString_FromStringAndSize(s, size);
329 if (str == NULL)
330 return NULL;
331 v = PyString_AsDecodedString(str, encoding, errors);
332 Py_DECREF(str);
333 return v;
334}
335
336PyObject *PyString_AsDecodedObject(PyObject *str,
337 const char *encoding,
338 const char *errors)
339{
340 PyObject *v;
341
342 if (!PyString_Check(str)) {
343 PyErr_BadArgument();
344 goto onError;
345 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000346
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000347 if (encoding == NULL) {
348#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000349 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000350#else
351 PyErr_SetString(PyExc_ValueError, "no encoding specified");
352 goto onError;
353#endif
354 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355
356 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000357 v = PyCodec_Decode(str, encoding, errors);
358 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000359 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360
361 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000364 return NULL;
365}
366
367PyObject *PyString_AsDecodedString(PyObject *str,
368 const char *encoding,
369 const char *errors)
370{
371 PyObject *v;
372
373 v = PyString_AsDecodedObject(str, encoding, errors);
374 if (v == NULL)
375 goto onError;
376
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000377#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000378 /* Convert Unicode to a string using the default encoding */
379 if (PyUnicode_Check(v)) {
380 PyObject *temp = v;
381 v = PyUnicode_AsEncodedString(v, NULL, NULL);
382 Py_DECREF(temp);
383 if (v == NULL)
384 goto onError;
385 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000387 if (!PyString_Check(v)) {
388 PyErr_Format(PyExc_TypeError,
389 "decoder did not return a string object (type=%.400s)",
390 v->ob_type->tp_name);
391 Py_DECREF(v);
392 goto onError;
393 }
394
395 return v;
396
397 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 return NULL;
399}
400
401PyObject *PyString_Encode(const char *s,
402 int size,
403 const char *encoding,
404 const char *errors)
405{
406 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000407
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000408 str = PyString_FromStringAndSize(s, size);
409 if (str == NULL)
410 return NULL;
411 v = PyString_AsEncodedString(str, encoding, errors);
412 Py_DECREF(str);
413 return v;
414}
415
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000416PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 const char *encoding,
418 const char *errors)
419{
420 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000421
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 if (!PyString_Check(str)) {
423 PyErr_BadArgument();
424 goto onError;
425 }
426
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000427 if (encoding == NULL) {
428#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000430#else
431 PyErr_SetString(PyExc_ValueError, "no encoding specified");
432 goto onError;
433#endif
434 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000435
436 /* Encode via the codec registry */
437 v = PyCodec_Encode(str, encoding, errors);
438 if (v == NULL)
439 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000440
441 return v;
442
443 onError:
444 return NULL;
445}
446
447PyObject *PyString_AsEncodedString(PyObject *str,
448 const char *encoding,
449 const char *errors)
450{
451 PyObject *v;
452
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000453 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000454 if (v == NULL)
455 goto onError;
456
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000457#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000458 /* Convert Unicode to a string using the default encoding */
459 if (PyUnicode_Check(v)) {
460 PyObject *temp = v;
461 v = PyUnicode_AsEncodedString(v, NULL, NULL);
462 Py_DECREF(temp);
463 if (v == NULL)
464 goto onError;
465 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000467 if (!PyString_Check(v)) {
468 PyErr_Format(PyExc_TypeError,
469 "encoder did not return a string object (type=%.400s)",
470 v->ob_type->tp_name);
471 Py_DECREF(v);
472 goto onError;
473 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000476
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000477 onError:
478 return NULL;
479}
480
Guido van Rossum234f9421993-06-17 12:35:49 +0000481static void
Fred Drakeba096332000-07-09 07:04:36 +0000482string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000483{
Guido van Rossum9475a232001-10-05 20:51:39 +0000484 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000485}
486
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000487static int
488string_getsize(register PyObject *op)
489{
490 char *s;
491 int len;
492 if (PyString_AsStringAndSize(op, &s, &len))
493 return -1;
494 return len;
495}
496
497static /*const*/ char *
498string_getbuffer(register PyObject *op)
499{
500 char *s;
501 int len;
502 if (PyString_AsStringAndSize(op, &s, &len))
503 return NULL;
504 return s;
505}
506
Guido van Rossumd7047b31995-01-02 19:07:15 +0000507int
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
515/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518 if (!PyString_Check(op))
519 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000520 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521}
522
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523int
524PyString_AsStringAndSize(register PyObject *obj,
525 register char **s,
526 register int *len)
527{
528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
532
533 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000534#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000535 if (PyUnicode_Check(obj)) {
536 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
537 if (obj == NULL)
538 return -1;
539 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000540 else
541#endif
542 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543 PyErr_Format(PyExc_TypeError,
544 "expected string or Unicode object, "
545 "%.200s found", obj->ob_type->tp_name);
546 return -1;
547 }
548 }
549
550 *s = PyString_AS_STRING(obj);
551 if (len != NULL)
552 *len = PyString_GET_SIZE(obj);
553 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
554 PyErr_SetString(PyExc_TypeError,
555 "expected string without null bytes");
556 return -1;
557 }
558 return 0;
559}
560
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561/* Methods */
562
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000563static int
Fred Drakeba096332000-07-09 07:04:36 +0000564string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565{
566 int i;
567 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000568 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000569 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000570 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000571 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000572 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000574
Thomas Wouters7e474022000-07-16 12:04:32 +0000575 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000576 quote = '\'';
577 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
578 quote = '"';
579
580 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000581 for (i = 0; i < op->ob_size; i++) {
582 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000583 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000584 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000585 else if (c == '\t')
586 fprintf(fp, "\\t");
587 else if (c == '\n')
588 fprintf(fp, "\\n");
589 else if (c == '\r')
590 fprintf(fp, "\\r");
591 else if (c < ' ' || c >= 0x7f)
592 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000594 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000597 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000598}
599
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000600static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000601string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000602{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000603 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
604 PyObject *v;
605 if (newsize > INT_MAX) {
606 PyErr_SetString(PyExc_OverflowError,
607 "string is too large to make repr");
608 }
609 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000611 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 }
613 else {
614 register int i;
615 register char c;
616 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000617 int quote;
618
Thomas Wouters7e474022000-07-16 12:04:32 +0000619 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 quote = '\'';
621 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
622 quote = '"';
623
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000624 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000625 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626 for (i = 0; i < op->ob_size; i++) {
627 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000628 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000630 else if (c == '\t')
631 *p++ = '\\', *p++ = 't';
632 else if (c == '\n')
633 *p++ = '\\', *p++ = 'n';
634 else if (c == '\r')
635 *p++ = '\\', *p++ = 'r';
636 else if (c < ' ' || c >= 0x7f) {
637 sprintf(p, "\\x%02x", c & 0xff);
638 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 }
640 else
641 *p++ = c;
642 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000645 _PyString_Resize(
646 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000647 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649}
650
Guido van Rossum189f1df2001-05-01 16:51:53 +0000651static PyObject *
652string_str(PyObject *s)
653{
654 Py_INCREF(s);
655 return s;
656}
657
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658static int
Fred Drakeba096332000-07-09 07:04:36 +0000659string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000660{
661 return a->ob_size;
662}
663
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000664static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000665string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000666{
667 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000668 register PyStringObject *op;
669 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000670#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000671 if (PyUnicode_Check(bb))
672 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000673#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000674 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000675 "cannot add type \"%.200s\" to string",
676 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677 return NULL;
678 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000679#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000680 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000681 if ((a->ob_size == 0 || b->ob_size == 0) &&
682 PyString_CheckExact(a) && PyString_CheckExact(b)) {
683 if (a->ob_size == 0) {
684 Py_INCREF(bb);
685 return bb;
686 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687 Py_INCREF(a);
688 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689 }
690 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000691 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000693 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000694 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000696 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000697#ifdef CACHE_HASH
698 op->ob_shash = -1;
699#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000700#ifdef INTERN_STRINGS
701 op->ob_sinterned = NULL;
702#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000703 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
704 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
705 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707#undef b
708}
709
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000711string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
713 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000714 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000715 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000716 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717 if (n < 0)
718 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000719 /* watch out for overflows: the size can overflow int,
720 * and the # of bytes needed can overflow size_t
721 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000723 if (n && size / n != a->ob_size) {
724 PyErr_SetString(PyExc_OverflowError,
725 "repeated string is too long");
726 return NULL;
727 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000728 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 Py_INCREF(a);
730 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731 }
Tim Peters8f422462000-09-09 06:13:41 +0000732 nbytes = size * sizeof(char);
733 if (nbytes / sizeof(char) != (size_t)size ||
734 nbytes + sizeof(PyStringObject) <= nbytes) {
735 PyErr_SetString(PyExc_OverflowError,
736 "repeated string is too long");
737 return NULL;
738 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000739 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000740 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000741 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000742 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000743 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000744#ifdef CACHE_HASH
745 op->ob_shash = -1;
746#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000747#ifdef INTERN_STRINGS
748 op->ob_sinterned = NULL;
749#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000750 for (i = 0; i < size; i += a->ob_size)
751 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
752 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000754}
755
756/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
757
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000758static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000759string_slice(register PyStringObject *a, register int i, register int j)
760 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761{
762 if (i < 0)
763 i = 0;
764 if (j < 0)
765 j = 0; /* Avoid signed/unsigned bug in next line */
766 if (j > a->ob_size)
767 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000768 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
769 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 Py_INCREF(a);
771 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000772 }
773 if (j < i)
774 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000775 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000776}
777
Guido van Rossum9284a572000-03-07 15:53:43 +0000778static int
Fred Drakeba096332000-07-09 07:04:36 +0000779string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000780{
781 register char *s, *end;
782 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000783#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000784 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000785 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000786#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000787 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000788 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000789 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000790 return -1;
791 }
792 c = PyString_AsString(el)[0];
793 s = PyString_AsString(a);
794 end = s + PyString_Size(a);
795 while (s < end) {
796 if (c == *s++)
797 return 1;
798 }
799 return 0;
800}
801
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000802static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000803string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000805 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 return NULL;
810 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000811 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000812 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000813 if (v == NULL)
814 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000815 else {
816#ifdef COUNT_ALLOCS
817 one_strings++;
818#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000819 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000820 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000821 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822}
823
Martin v. Löwiscd353062001-05-24 16:56:35 +0000824static PyObject*
825string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000827 int c;
828 int len_a, len_b;
829 int min_len;
830 PyObject *result;
831
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000832 /* Make sure both arguments are strings. */
833 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000834 result = Py_NotImplemented;
835 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000836 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000837 if (a == b) {
838 switch (op) {
839 case Py_EQ:case Py_LE:case Py_GE:
840 result = Py_True;
841 goto out;
842 case Py_NE:case Py_LT:case Py_GT:
843 result = Py_False;
844 goto out;
845 }
846 }
847 if (op == Py_EQ) {
848 /* Supporting Py_NE here as well does not save
849 much time, since Py_NE is rarely used. */
850 if (a->ob_size == b->ob_size
851 && (a->ob_sval[0] == b->ob_sval[0]
852 && memcmp(a->ob_sval, b->ob_sval,
853 a->ob_size) == 0)) {
854 result = Py_True;
855 } else {
856 result = Py_False;
857 }
858 goto out;
859 }
860 len_a = a->ob_size; len_b = b->ob_size;
861 min_len = (len_a < len_b) ? len_a : len_b;
862 if (min_len > 0) {
863 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
864 if (c==0)
865 c = memcmp(a->ob_sval, b->ob_sval, min_len);
866 }else
867 c = 0;
868 if (c == 0)
869 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
870 switch (op) {
871 case Py_LT: c = c < 0; break;
872 case Py_LE: c = c <= 0; break;
873 case Py_EQ: assert(0); break; /* unreachable */
874 case Py_NE: c = c != 0; break;
875 case Py_GT: c = c > 0; break;
876 case Py_GE: c = c >= 0; break;
877 default:
878 result = Py_NotImplemented;
879 goto out;
880 }
881 result = c ? Py_True : Py_False;
882 out:
883 Py_INCREF(result);
884 return result;
885}
886
887int
888_PyString_Eq(PyObject *o1, PyObject *o2)
889{
890 PyStringObject *a, *b;
891 a = (PyStringObject*)o1;
892 b = (PyStringObject*)o2;
893 return a->ob_size == b->ob_size
894 && *a->ob_sval == *b->ob_sval
895 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896}
897
Guido van Rossum9bfef441993-03-29 10:43:31 +0000898static long
Fred Drakeba096332000-07-09 07:04:36 +0000899string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000900{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000901 register int len;
902 register unsigned char *p;
903 register long x;
904
905#ifdef CACHE_HASH
906 if (a->ob_shash != -1)
907 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000908#ifdef INTERN_STRINGS
909 if (a->ob_sinterned != NULL)
910 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000912#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000913#endif
914 len = a->ob_size;
915 p = (unsigned char *) a->ob_sval;
916 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000917 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000918 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919 x ^= a->ob_size;
920 if (x == -1)
921 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922#ifdef CACHE_HASH
923 a->ob_shash = x;
924#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000925 return x;
926}
927
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000928static int
Fred Drakeba096332000-07-09 07:04:36 +0000929string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000930{
931 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000932 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000933 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000934 return -1;
935 }
936 *ptr = (void *)self->ob_sval;
937 return self->ob_size;
938}
939
940static int
Fred Drakeba096332000-07-09 07:04:36 +0000941string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000942{
Guido van Rossum045e6881997-09-08 18:30:11 +0000943 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000944 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000945 return -1;
946}
947
948static int
Fred Drakeba096332000-07-09 07:04:36 +0000949string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000950{
951 if ( lenp )
952 *lenp = self->ob_size;
953 return 1;
954}
955
Guido van Rossum1db70701998-10-08 02:18:52 +0000956static int
Fred Drakeba096332000-07-09 07:04:36 +0000957string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000958{
959 if ( index != 0 ) {
960 PyErr_SetString(PyExc_SystemError,
961 "accessing non-existent string segment");
962 return -1;
963 }
964 *ptr = self->ob_sval;
965 return self->ob_size;
966}
967
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000969 (inquiry)string_length, /*sq_length*/
970 (binaryfunc)string_concat, /*sq_concat*/
971 (intargfunc)string_repeat, /*sq_repeat*/
972 (intargfunc)string_item, /*sq_item*/
973 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000974 0, /*sq_ass_item*/
975 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000976 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977};
978
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000979static PyBufferProcs string_as_buffer = {
980 (getreadbufferproc)string_buffer_getreadbuf,
981 (getwritebufferproc)string_buffer_getwritebuf,
982 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000983 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000984};
985
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000986
987
988#define LEFTSTRIP 0
989#define RIGHTSTRIP 1
990#define BOTHSTRIP 2
991
992
993static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000994split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000996 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997 PyObject* item;
998 PyObject *list = PyList_New(0);
999
1000 if (list == NULL)
1001 return NULL;
1002
Guido van Rossum4c08d552000-03-10 22:55:18 +00001003 for (i = j = 0; i < len; ) {
1004 while (i < len && isspace(Py_CHARMASK(s[i])))
1005 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001007 while (i < len && !isspace(Py_CHARMASK(s[i])))
1008 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001009 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001010 if (maxsplit-- <= 0)
1011 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1013 if (item == NULL)
1014 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015 err = PyList_Append(list, item);
1016 Py_DECREF(item);
1017 if (err < 0)
1018 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001019 while (i < len && isspace(Py_CHARMASK(s[i])))
1020 i++;
1021 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022 }
1023 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001024 if (j < len) {
1025 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1026 if (item == NULL)
1027 goto finally;
1028 err = PyList_Append(list, item);
1029 Py_DECREF(item);
1030 if (err < 0)
1031 goto finally;
1032 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033 return list;
1034 finally:
1035 Py_DECREF(list);
1036 return NULL;
1037}
1038
1039
1040static char split__doc__[] =
1041"S.split([sep [,maxsplit]]) -> list of strings\n\
1042\n\
1043Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001044delimiter string. If maxsplit is given, at most maxsplit\n\
1045splits are done. If sep is not specified, any whitespace string\n\
1046is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047
1048static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001049string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050{
1051 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001052 int maxsplit = -1;
1053 const char *s = PyString_AS_STRING(self), *sub;
1054 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055
Guido van Rossum4c08d552000-03-10 22:55:18 +00001056 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001057 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001058 if (maxsplit < 0)
1059 maxsplit = INT_MAX;
1060 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062 if (PyString_Check(subobj)) {
1063 sub = PyString_AS_STRING(subobj);
1064 n = PyString_GET_SIZE(subobj);
1065 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001067 else if (PyUnicode_Check(subobj))
1068 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001069#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001070 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1071 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072 if (n == 0) {
1073 PyErr_SetString(PyExc_ValueError, "empty separator");
1074 return NULL;
1075 }
1076
1077 list = PyList_New(0);
1078 if (list == NULL)
1079 return NULL;
1080
1081 i = j = 0;
1082 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001083 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001084 if (maxsplit-- <= 0)
1085 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1087 if (item == NULL)
1088 goto fail;
1089 err = PyList_Append(list, item);
1090 Py_DECREF(item);
1091 if (err < 0)
1092 goto fail;
1093 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001094 }
1095 else
1096 i++;
1097 }
1098 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1099 if (item == NULL)
1100 goto fail;
1101 err = PyList_Append(list, item);
1102 Py_DECREF(item);
1103 if (err < 0)
1104 goto fail;
1105
1106 return list;
1107
1108 fail:
1109 Py_DECREF(list);
1110 return NULL;
1111}
1112
1113
1114static char join__doc__[] =
1115"S.join(sequence) -> string\n\
1116\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001117Return a string which is the concatenation of the strings in the\n\
1118sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119
1120static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001121string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122{
1123 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001124 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001125 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126 char *p;
1127 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001128 size_t sz = 0;
1129 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001130 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001131
Tim Peters19fe14e2001-01-19 03:03:47 +00001132 seq = PySequence_Fast(orig, "");
1133 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001134 if (PyErr_ExceptionMatches(PyExc_TypeError))
1135 PyErr_Format(PyExc_TypeError,
1136 "sequence expected, %.80s found",
1137 orig->ob_type->tp_name);
1138 return NULL;
1139 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001140
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001141 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001142 if (seqlen == 0) {
1143 Py_DECREF(seq);
1144 return PyString_FromString("");
1145 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001147 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001148 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1149 PyErr_Format(PyExc_TypeError,
1150 "sequence item 0: expected string,"
1151 " %.80s found",
1152 item->ob_type->tp_name);
1153 Py_DECREF(seq);
1154 return NULL;
1155 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001156 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001157 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001159 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001160
Tim Peters19fe14e2001-01-19 03:03:47 +00001161 /* There are at least two things to join. Do a pre-pass to figure out
1162 * the total amount of space we'll need (sz), see whether any argument
1163 * is absurd, and defer to the Unicode join if appropriate.
1164 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001165 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001166 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001167 item = PySequence_Fast_GET_ITEM(seq, i);
1168 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001169#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001170 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001171 /* Defer to Unicode join.
1172 * CAUTION: There's no gurantee that the
1173 * original sequence can be iterated over
1174 * again, so we must pass seq here.
1175 */
1176 PyObject *result;
1177 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001178 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001179 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001181#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001182 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001183 "sequence item %i: expected string,"
1184 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001186 Py_DECREF(seq);
1187 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001188 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001189 sz += PyString_GET_SIZE(item);
1190 if (i != 0)
1191 sz += seplen;
1192 if (sz < old_sz || sz > INT_MAX) {
1193 PyErr_SetString(PyExc_OverflowError,
1194 "join() is too long for a Python string");
1195 Py_DECREF(seq);
1196 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001197 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001198 }
1199
1200 /* Allocate result space. */
1201 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1202 if (res == NULL) {
1203 Py_DECREF(seq);
1204 return NULL;
1205 }
1206
1207 /* Catenate everything. */
1208 p = PyString_AS_STRING(res);
1209 for (i = 0; i < seqlen; ++i) {
1210 size_t n;
1211 item = PySequence_Fast_GET_ITEM(seq, i);
1212 n = PyString_GET_SIZE(item);
1213 memcpy(p, PyString_AS_STRING(item), n);
1214 p += n;
1215 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001216 memcpy(p, sep, seplen);
1217 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001218 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001220
Jeremy Hylton49048292000-07-11 03:28:17 +00001221 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001222 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223}
1224
Tim Peters52e155e2001-06-16 05:42:57 +00001225PyObject *
1226_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001227{
Tim Petersa7259592001-06-16 05:11:17 +00001228 assert(sep != NULL && PyString_Check(sep));
1229 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001230 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001231}
1232
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233static long
Fred Drakeba096332000-07-09 07:04:36 +00001234string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001236 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 int len = PyString_GET_SIZE(self);
1238 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001241 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001242 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243 return -2;
1244 if (PyString_Check(subobj)) {
1245 sub = PyString_AS_STRING(subobj);
1246 n = PyString_GET_SIZE(subobj);
1247 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001248#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 else if (PyUnicode_Check(subobj))
1250 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001251#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001252 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253 return -2;
1254
1255 if (last > len)
1256 last = len;
1257 if (last < 0)
1258 last += len;
1259 if (last < 0)
1260 last = 0;
1261 if (i < 0)
1262 i += len;
1263 if (i < 0)
1264 i = 0;
1265
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 if (dir > 0) {
1267 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001268 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001269 last -= n;
1270 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001271 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 return (long)i;
1273 }
1274 else {
1275 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001276
Guido van Rossum4c08d552000-03-10 22:55:18 +00001277 if (n == 0 && i <= last)
1278 return (long)last;
1279 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001280 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001281 return (long)j;
1282 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001283
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284 return -1;
1285}
1286
1287
1288static char find__doc__[] =
1289"S.find(sub [,start [,end]]) -> int\n\
1290\n\
1291Return the lowest index in S where substring sub is found,\n\
1292such that sub is contained within s[start,end]. Optional\n\
1293arguments start and end are interpreted as in slice notation.\n\
1294\n\
1295Return -1 on failure.";
1296
1297static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001298string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001301 if (result == -2)
1302 return NULL;
1303 return PyInt_FromLong(result);
1304}
1305
1306
1307static char index__doc__[] =
1308"S.index(sub [,start [,end]]) -> int\n\
1309\n\
1310Like S.find() but raise ValueError when the substring is not found.";
1311
1312static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001313string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316 if (result == -2)
1317 return NULL;
1318 if (result == -1) {
1319 PyErr_SetString(PyExc_ValueError,
1320 "substring not found in string.index");
1321 return NULL;
1322 }
1323 return PyInt_FromLong(result);
1324}
1325
1326
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327static char rfind__doc__[] =
1328"S.rfind(sub [,start [,end]]) -> int\n\
1329\n\
1330Return the highest index in S where substring sub is found,\n\
1331such that sub is contained within s[start,end]. Optional\n\
1332arguments start and end are interpreted as in slice notation.\n\
1333\n\
1334Return -1 on failure.";
1335
1336static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001337string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001339 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340 if (result == -2)
1341 return NULL;
1342 return PyInt_FromLong(result);
1343}
1344
1345
1346static char rindex__doc__[] =
1347"S.rindex(sub [,start [,end]]) -> int\n\
1348\n\
1349Like S.rfind() but raise ValueError when the substring is not found.";
1350
1351static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001352string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355 if (result == -2)
1356 return NULL;
1357 if (result == -1) {
1358 PyErr_SetString(PyExc_ValueError,
1359 "substring not found in string.rindex");
1360 return NULL;
1361 }
1362 return PyInt_FromLong(result);
1363}
1364
1365
1366static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001367do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368{
1369 char *s = PyString_AS_STRING(self);
1370 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 i = 0;
1373 if (striptype != RIGHTSTRIP) {
1374 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1375 i++;
1376 }
1377 }
1378
1379 j = len;
1380 if (striptype != LEFTSTRIP) {
1381 do {
1382 j--;
1383 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1384 j++;
1385 }
1386
Tim Peters8fa5dd02001-09-12 02:18:30 +00001387 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388 Py_INCREF(self);
1389 return (PyObject*)self;
1390 }
1391 else
1392 return PyString_FromStringAndSize(s+i, j-i);
1393}
1394
1395
1396static char strip__doc__[] =
1397"S.strip() -> string\n\
1398\n\
1399Return a copy of the string S with leading and trailing\n\
1400whitespace removed.";
1401
1402static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001403string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001405 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406}
1407
1408
1409static char lstrip__doc__[] =
1410"S.lstrip() -> string\n\
1411\n\
1412Return a copy of the string S with leading whitespace removed.";
1413
1414static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001415string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001417 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418}
1419
1420
1421static char rstrip__doc__[] =
1422"S.rstrip() -> string\n\
1423\n\
1424Return a copy of the string S with trailing whitespace removed.";
1425
1426static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001427string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001429 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430}
1431
1432
1433static char lower__doc__[] =
1434"S.lower() -> string\n\
1435\n\
1436Return a copy of the string S converted to lowercase.";
1437
1438static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001439string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440{
1441 char *s = PyString_AS_STRING(self), *s_new;
1442 int i, n = PyString_GET_SIZE(self);
1443 PyObject *new;
1444
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445 new = PyString_FromStringAndSize(NULL, n);
1446 if (new == NULL)
1447 return NULL;
1448 s_new = PyString_AsString(new);
1449 for (i = 0; i < n; i++) {
1450 int c = Py_CHARMASK(*s++);
1451 if (isupper(c)) {
1452 *s_new = tolower(c);
1453 } else
1454 *s_new = c;
1455 s_new++;
1456 }
1457 return new;
1458}
1459
1460
1461static char upper__doc__[] =
1462"S.upper() -> string\n\
1463\n\
1464Return a copy of the string S converted to uppercase.";
1465
1466static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001467string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468{
1469 char *s = PyString_AS_STRING(self), *s_new;
1470 int i, n = PyString_GET_SIZE(self);
1471 PyObject *new;
1472
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 new = PyString_FromStringAndSize(NULL, n);
1474 if (new == NULL)
1475 return NULL;
1476 s_new = PyString_AsString(new);
1477 for (i = 0; i < n; i++) {
1478 int c = Py_CHARMASK(*s++);
1479 if (islower(c)) {
1480 *s_new = toupper(c);
1481 } else
1482 *s_new = c;
1483 s_new++;
1484 }
1485 return new;
1486}
1487
1488
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489static char title__doc__[] =
1490"S.title() -> string\n\
1491\n\
1492Return a titlecased version of S, i.e. words start with uppercase\n\
1493characters, all remaining cased characters have lowercase.";
1494
1495static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001496string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497{
1498 char *s = PyString_AS_STRING(self), *s_new;
1499 int i, n = PyString_GET_SIZE(self);
1500 int previous_is_cased = 0;
1501 PyObject *new;
1502
Guido van Rossum4c08d552000-03-10 22:55:18 +00001503 new = PyString_FromStringAndSize(NULL, n);
1504 if (new == NULL)
1505 return NULL;
1506 s_new = PyString_AsString(new);
1507 for (i = 0; i < n; i++) {
1508 int c = Py_CHARMASK(*s++);
1509 if (islower(c)) {
1510 if (!previous_is_cased)
1511 c = toupper(c);
1512 previous_is_cased = 1;
1513 } else if (isupper(c)) {
1514 if (previous_is_cased)
1515 c = tolower(c);
1516 previous_is_cased = 1;
1517 } else
1518 previous_is_cased = 0;
1519 *s_new++ = c;
1520 }
1521 return new;
1522}
1523
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524static char capitalize__doc__[] =
1525"S.capitalize() -> string\n\
1526\n\
1527Return a copy of the string S with only its first character\n\
1528capitalized.";
1529
1530static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001531string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532{
1533 char *s = PyString_AS_STRING(self), *s_new;
1534 int i, n = PyString_GET_SIZE(self);
1535 PyObject *new;
1536
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537 new = PyString_FromStringAndSize(NULL, n);
1538 if (new == NULL)
1539 return NULL;
1540 s_new = PyString_AsString(new);
1541 if (0 < n) {
1542 int c = Py_CHARMASK(*s++);
1543 if (islower(c))
1544 *s_new = toupper(c);
1545 else
1546 *s_new = c;
1547 s_new++;
1548 }
1549 for (i = 1; i < n; i++) {
1550 int c = Py_CHARMASK(*s++);
1551 if (isupper(c))
1552 *s_new = tolower(c);
1553 else
1554 *s_new = c;
1555 s_new++;
1556 }
1557 return new;
1558}
1559
1560
1561static char count__doc__[] =
1562"S.count(sub[, start[, end]]) -> int\n\
1563\n\
1564Return the number of occurrences of substring sub in string\n\
1565S[start:end]. Optional arguments start and end are\n\
1566interpreted as in slice notation.";
1567
1568static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001569string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572 int len = PyString_GET_SIZE(self), n;
1573 int i = 0, last = INT_MAX;
1574 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576
Guido van Rossumc6821402000-05-08 14:08:05 +00001577 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1578 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001580
Guido van Rossum4c08d552000-03-10 22:55:18 +00001581 if (PyString_Check(subobj)) {
1582 sub = PyString_AS_STRING(subobj);
1583 n = PyString_GET_SIZE(subobj);
1584 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001585#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001586 else if (PyUnicode_Check(subobj)) {
1587 int count;
1588 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1589 if (count == -1)
1590 return NULL;
1591 else
1592 return PyInt_FromLong((long) count);
1593 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001594#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001595 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1596 return NULL;
1597
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 if (last > len)
1599 last = len;
1600 if (last < 0)
1601 last += len;
1602 if (last < 0)
1603 last = 0;
1604 if (i < 0)
1605 i += len;
1606 if (i < 0)
1607 i = 0;
1608 m = last + 1 - n;
1609 if (n == 0)
1610 return PyInt_FromLong((long) (m-i));
1611
1612 r = 0;
1613 while (i < m) {
1614 if (!memcmp(s+i, sub, n)) {
1615 r++;
1616 i += n;
1617 } else {
1618 i++;
1619 }
1620 }
1621 return PyInt_FromLong((long) r);
1622}
1623
1624
1625static char swapcase__doc__[] =
1626"S.swapcase() -> string\n\
1627\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001628Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629converted to lowercase and vice versa.";
1630
1631static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001632string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633{
1634 char *s = PyString_AS_STRING(self), *s_new;
1635 int i, n = PyString_GET_SIZE(self);
1636 PyObject *new;
1637
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 new = PyString_FromStringAndSize(NULL, n);
1639 if (new == NULL)
1640 return NULL;
1641 s_new = PyString_AsString(new);
1642 for (i = 0; i < n; i++) {
1643 int c = Py_CHARMASK(*s++);
1644 if (islower(c)) {
1645 *s_new = toupper(c);
1646 }
1647 else if (isupper(c)) {
1648 *s_new = tolower(c);
1649 }
1650 else
1651 *s_new = c;
1652 s_new++;
1653 }
1654 return new;
1655}
1656
1657
1658static char translate__doc__[] =
1659"S.translate(table [,deletechars]) -> string\n\
1660\n\
1661Return a copy of the string S, where all characters occurring\n\
1662in the optional argument deletechars are removed, and the\n\
1663remaining characters have been mapped through the given\n\
1664translation table, which must be a string of length 256.";
1665
1666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001667string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 register char *input, *output;
1670 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 register int i, c, changed = 0;
1672 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674 int inlen, tablen, dellen = 0;
1675 PyObject *result;
1676 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 if (!PyArg_ParseTuple(args, "O|O:translate",
1680 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682
1683 if (PyString_Check(tableobj)) {
1684 table1 = PyString_AS_STRING(tableobj);
1685 tablen = PyString_GET_SIZE(tableobj);
1686 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001687#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001689 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690 parameter; instead a mapping to None will cause characters
1691 to be deleted. */
1692 if (delobj != NULL) {
1693 PyErr_SetString(PyExc_TypeError,
1694 "deletions are implemented differently for unicode");
1695 return NULL;
1696 }
1697 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1698 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001699#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001702
1703 if (delobj != NULL) {
1704 if (PyString_Check(delobj)) {
1705 del_table = PyString_AS_STRING(delobj);
1706 dellen = PyString_GET_SIZE(delobj);
1707 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001708#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709 else if (PyUnicode_Check(delobj)) {
1710 PyErr_SetString(PyExc_TypeError,
1711 "deletions are implemented differently for unicode");
1712 return NULL;
1713 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001714#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1716 return NULL;
1717
1718 if (tablen != 256) {
1719 PyErr_SetString(PyExc_ValueError,
1720 "translation table must be 256 characters long");
1721 return NULL;
1722 }
1723 }
1724 else {
1725 del_table = NULL;
1726 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 }
1728
1729 table = table1;
1730 inlen = PyString_Size(input_obj);
1731 result = PyString_FromStringAndSize((char *)NULL, inlen);
1732 if (result == NULL)
1733 return NULL;
1734 output_start = output = PyString_AsString(result);
1735 input = PyString_AsString(input_obj);
1736
1737 if (dellen == 0) {
1738 /* If no deletions are required, use faster code */
1739 for (i = inlen; --i >= 0; ) {
1740 c = Py_CHARMASK(*input++);
1741 if (Py_CHARMASK((*output++ = table[c])) != c)
1742 changed = 1;
1743 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001744 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 return result;
1746 Py_DECREF(result);
1747 Py_INCREF(input_obj);
1748 return input_obj;
1749 }
1750
1751 for (i = 0; i < 256; i++)
1752 trans_table[i] = Py_CHARMASK(table[i]);
1753
1754 for (i = 0; i < dellen; i++)
1755 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1756
1757 for (i = inlen; --i >= 0; ) {
1758 c = Py_CHARMASK(*input++);
1759 if (trans_table[c] != -1)
1760 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1761 continue;
1762 changed = 1;
1763 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001764 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 Py_DECREF(result);
1766 Py_INCREF(input_obj);
1767 return input_obj;
1768 }
1769 /* Fix the size of the resulting string */
1770 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1771 return NULL;
1772 return result;
1773}
1774
1775
1776/* What follows is used for implementing replace(). Perry Stoll. */
1777
1778/*
1779 mymemfind
1780
1781 strstr replacement for arbitrary blocks of memory.
1782
Barry Warsaw51ac5802000-03-20 16:36:48 +00001783 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 contents of memory pointed to by PAT. Returns the index into MEM if
1785 found, or -1 if not found. If len of PAT is greater than length of
1786 MEM, the function returns -1.
1787*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001788static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001789mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790{
1791 register int ii;
1792
1793 /* pattern can not occur in the last pat_len-1 chars */
1794 len -= pat_len;
1795
1796 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001797 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798 return ii;
1799 }
1800 }
1801 return -1;
1802}
1803
1804/*
1805 mymemcnt
1806
1807 Return the number of distinct times PAT is found in MEM.
1808 meaning mem=1111 and pat==11 returns 2.
1809 mem=11111 and pat==11 also return 2.
1810 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001811static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001812mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813{
1814 register int offset = 0;
1815 int nfound = 0;
1816
1817 while (len >= 0) {
1818 offset = mymemfind(mem, len, pat, pat_len);
1819 if (offset == -1)
1820 break;
1821 mem += offset + pat_len;
1822 len -= offset + pat_len;
1823 nfound++;
1824 }
1825 return nfound;
1826}
1827
1828/*
1829 mymemreplace
1830
Thomas Wouters7e474022000-07-16 12:04:32 +00001831 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 replaced with SUB.
1833
Thomas Wouters7e474022000-07-16 12:04:32 +00001834 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 of PAT in STR, then the original string is returned. Otherwise, a new
1836 string is allocated here and returned.
1837
1838 on return, out_len is:
1839 the length of output string, or
1840 -1 if the input string is returned, or
1841 unchanged if an error occurs (no memory).
1842
1843 return value is:
1844 the new string allocated locally, or
1845 NULL if an error occurred.
1846*/
1847static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001848mymemreplace(const char *str, int len, /* input string */
1849 const char *pat, int pat_len, /* pattern string to find */
1850 const char *sub, int sub_len, /* substitution string */
1851 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001852 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853{
1854 char *out_s;
1855 char *new_s;
1856 int nfound, offset, new_len;
1857
1858 if (len == 0 || pat_len > len)
1859 goto return_same;
1860
1861 /* find length of output string */
1862 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001863 if (count < 0)
1864 count = INT_MAX;
1865 else if (nfound > count)
1866 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 if (nfound == 0)
1868 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001871 if (new_len == 0) {
1872 /* Have to allocate something for the caller to free(). */
1873 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001874 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001875 return NULL;
1876 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001878 else {
1879 assert(new_len > 0);
1880 new_s = (char *)PyMem_MALLOC(new_len);
1881 if (new_s == NULL)
1882 return NULL;
1883 out_s = new_s;
1884
Tim Peters9c012af2001-05-10 00:32:57 +00001885 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001886 /* find index of next instance of pattern */
1887 offset = mymemfind(str, len, pat, pat_len);
1888 if (offset == -1)
1889 break;
1890
1891 /* copy non matching part of input string */
1892 memcpy(new_s, str, offset);
1893 str += offset + pat_len;
1894 len -= offset + pat_len;
1895
1896 /* copy substitute into the output string */
1897 new_s += offset;
1898 memcpy(new_s, sub, sub_len);
1899 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001900 }
1901 /* copy any remaining values into output string */
1902 if (len > 0)
1903 memcpy(new_s, str, len);
1904 }
1905 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 return out_s;
1907
1908 return_same:
1909 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001910 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911}
1912
1913
1914static char replace__doc__[] =
1915"S.replace (old, new[, maxsplit]) -> string\n\
1916\n\
1917Return a copy of string S with all occurrences of substring\n\
1918old replaced by new. If the optional argument maxsplit is\n\
1919given, only the first maxsplit occurrences are replaced.";
1920
1921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001922string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001924 const char *str = PyString_AS_STRING(self), *sub, *repl;
1925 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001926 const int len = PyString_GET_SIZE(self);
1927 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001930 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931
Guido van Rossum4c08d552000-03-10 22:55:18 +00001932 if (!PyArg_ParseTuple(args, "OO|i:replace",
1933 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001935
1936 if (PyString_Check(subobj)) {
1937 sub = PyString_AS_STRING(subobj);
1938 sub_len = PyString_GET_SIZE(subobj);
1939 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001940#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001942 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001944#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1946 return NULL;
1947
1948 if (PyString_Check(replobj)) {
1949 repl = PyString_AS_STRING(replobj);
1950 repl_len = PyString_GET_SIZE(replobj);
1951 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001952#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001953 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001954 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001955 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001956#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1958 return NULL;
1959
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001960 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001961 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 return NULL;
1963 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001964 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965 if (new_s == NULL) {
1966 PyErr_NoMemory();
1967 return NULL;
1968 }
1969 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001970 if (PyString_CheckExact(self)) {
1971 /* we're returning another reference to self */
1972 new = (PyObject*)self;
1973 Py_INCREF(new);
1974 }
1975 else {
1976 new = PyString_FromStringAndSize(str, len);
1977 if (new == NULL)
1978 return NULL;
1979 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980 }
1981 else {
1982 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001983 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984 }
1985 return new;
1986}
1987
1988
1989static char startswith__doc__[] =
1990"S.startswith(prefix[, start[, end]]) -> int\n\
1991\n\
1992Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1993optional start, test S beginning at that position. With optional end, stop\n\
1994comparing S at that position.";
1995
1996static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001997string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001999 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002001 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002 int plen;
2003 int start = 0;
2004 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006
Guido van Rossumc6821402000-05-08 14:08:05 +00002007 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2008 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002009 return NULL;
2010 if (PyString_Check(subobj)) {
2011 prefix = PyString_AS_STRING(subobj);
2012 plen = PyString_GET_SIZE(subobj);
2013 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002014#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002015 else if (PyUnicode_Check(subobj)) {
2016 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002017 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002018 subobj, start, end, -1);
2019 if (rc == -1)
2020 return NULL;
2021 else
2022 return PyInt_FromLong((long) rc);
2023 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002024#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002025 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026 return NULL;
2027
2028 /* adopt Java semantics for index out of range. it is legal for
2029 * offset to be == plen, but this only returns true if prefix is
2030 * the empty string.
2031 */
2032 if (start < 0 || start+plen > len)
2033 return PyInt_FromLong(0);
2034
2035 if (!memcmp(str+start, prefix, plen)) {
2036 /* did the match end after the specified end? */
2037 if (end < 0)
2038 return PyInt_FromLong(1);
2039 else if (end - start < plen)
2040 return PyInt_FromLong(0);
2041 else
2042 return PyInt_FromLong(1);
2043 }
2044 else return PyInt_FromLong(0);
2045}
2046
2047
2048static char endswith__doc__[] =
2049"S.endswith(suffix[, start[, end]]) -> int\n\
2050\n\
2051Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2052optional start, test S beginning at that position. With optional end, stop\n\
2053comparing S at that position.";
2054
2055static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002056string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002058 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 const char* suffix;
2061 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062 int start = 0;
2063 int end = -1;
2064 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066
Guido van Rossumc6821402000-05-08 14:08:05 +00002067 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2068 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 return NULL;
2070 if (PyString_Check(subobj)) {
2071 suffix = PyString_AS_STRING(subobj);
2072 slen = PyString_GET_SIZE(subobj);
2073 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002074#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002075 else if (PyUnicode_Check(subobj)) {
2076 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002077 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002078 subobj, start, end, +1);
2079 if (rc == -1)
2080 return NULL;
2081 else
2082 return PyInt_FromLong((long) rc);
2083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002084#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 return NULL;
2087
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089 return PyInt_FromLong(0);
2090
2091 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095 return PyInt_FromLong(1);
2096 else return PyInt_FromLong(0);
2097}
2098
2099
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002100static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002101"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002102\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002103Encodes S using the codec registered for encoding. encoding defaults\n\
2104to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002105handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2106a ValueError. Other possible values are 'ignore' and 'replace'.";
2107
2108static PyObject *
2109string_encode(PyStringObject *self, PyObject *args)
2110{
2111 char *encoding = NULL;
2112 char *errors = NULL;
2113 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2114 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002115 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2116}
2117
2118
2119static char decode__doc__[] =
2120"S.decode([encoding[,errors]]) -> object\n\
2121\n\
2122Decodes S using the codec registered for encoding. encoding defaults\n\
2123to the default encoding. errors may be given to set a different error\n\
2124handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2125a ValueError. Other possible values are 'ignore' and 'replace'.";
2126
2127static PyObject *
2128string_decode(PyStringObject *self, PyObject *args)
2129{
2130 char *encoding = NULL;
2131 char *errors = NULL;
2132 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2133 return NULL;
2134 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002135}
2136
2137
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138static char expandtabs__doc__[] =
2139"S.expandtabs([tabsize]) -> string\n\
2140\n\
2141Return a copy of S where all tab characters are expanded using spaces.\n\
2142If tabsize is not given, a tab size of 8 characters is assumed.";
2143
2144static PyObject*
2145string_expandtabs(PyStringObject *self, PyObject *args)
2146{
2147 const char *e, *p;
2148 char *q;
2149 int i, j;
2150 PyObject *u;
2151 int tabsize = 8;
2152
2153 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2154 return NULL;
2155
Thomas Wouters7e474022000-07-16 12:04:32 +00002156 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002157 i = j = 0;
2158 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2159 for (p = PyString_AS_STRING(self); p < e; p++)
2160 if (*p == '\t') {
2161 if (tabsize > 0)
2162 j += tabsize - (j % tabsize);
2163 }
2164 else {
2165 j++;
2166 if (*p == '\n' || *p == '\r') {
2167 i += j;
2168 j = 0;
2169 }
2170 }
2171
2172 /* Second pass: create output string and fill it */
2173 u = PyString_FromStringAndSize(NULL, i + j);
2174 if (!u)
2175 return NULL;
2176
2177 j = 0;
2178 q = PyString_AS_STRING(u);
2179
2180 for (p = PyString_AS_STRING(self); p < e; p++)
2181 if (*p == '\t') {
2182 if (tabsize > 0) {
2183 i = tabsize - (j % tabsize);
2184 j += i;
2185 while (i--)
2186 *q++ = ' ';
2187 }
2188 }
2189 else {
2190 j++;
2191 *q++ = *p;
2192 if (*p == '\n' || *p == '\r')
2193 j = 0;
2194 }
2195
2196 return u;
2197}
2198
Tim Peters8fa5dd02001-09-12 02:18:30 +00002199static PyObject *
2200pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201{
2202 PyObject *u;
2203
2204 if (left < 0)
2205 left = 0;
2206 if (right < 0)
2207 right = 0;
2208
Tim Peters8fa5dd02001-09-12 02:18:30 +00002209 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 Py_INCREF(self);
2211 return (PyObject *)self;
2212 }
2213
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002214 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002215 left + PyString_GET_SIZE(self) + right);
2216 if (u) {
2217 if (left)
2218 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002219 memcpy(PyString_AS_STRING(u) + left,
2220 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221 PyString_GET_SIZE(self));
2222 if (right)
2223 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2224 fill, right);
2225 }
2226
2227 return u;
2228}
2229
2230static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002231"S.ljust(width) -> string\n"
2232"\n"
2233"Return S left justified in a string of length width. Padding is\n"
2234"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235
2236static PyObject *
2237string_ljust(PyStringObject *self, PyObject *args)
2238{
2239 int width;
2240 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2241 return NULL;
2242
Tim Peters8fa5dd02001-09-12 02:18:30 +00002243 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002244 Py_INCREF(self);
2245 return (PyObject*) self;
2246 }
2247
2248 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2249}
2250
2251
2252static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002253"S.rjust(width) -> string\n"
2254"\n"
2255"Return S right justified in a string of length width. Padding is\n"
2256"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257
2258static PyObject *
2259string_rjust(PyStringObject *self, PyObject *args)
2260{
2261 int width;
2262 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2263 return NULL;
2264
Tim Peters8fa5dd02001-09-12 02:18:30 +00002265 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 Py_INCREF(self);
2267 return (PyObject*) self;
2268 }
2269
2270 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2271}
2272
2273
2274static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002275"S.center(width) -> string\n"
2276"\n"
2277"Return S centered in a string of length width. Padding is done\n"
2278"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279
2280static PyObject *
2281string_center(PyStringObject *self, PyObject *args)
2282{
2283 int marg, left;
2284 int width;
2285
2286 if (!PyArg_ParseTuple(args, "i:center", &width))
2287 return NULL;
2288
Tim Peters8fa5dd02001-09-12 02:18:30 +00002289 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290 Py_INCREF(self);
2291 return (PyObject*) self;
2292 }
2293
2294 marg = width - PyString_GET_SIZE(self);
2295 left = marg / 2 + (marg & width & 1);
2296
2297 return pad(self, left, marg - left, ' ');
2298}
2299
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002301"S.isspace() -> int\n"
2302"\n"
2303"Return 1 if there are only whitespace characters in S,\n"
2304"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305
2306static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002307string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308{
Fred Drakeba096332000-07-09 07:04:36 +00002309 register const unsigned char *p
2310 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002311 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313 /* Shortcut for single character strings */
2314 if (PyString_GET_SIZE(self) == 1 &&
2315 isspace(*p))
2316 return PyInt_FromLong(1);
2317
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002318 /* Special case for empty strings */
2319 if (PyString_GET_SIZE(self) == 0)
2320 return PyInt_FromLong(0);
2321
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 e = p + PyString_GET_SIZE(self);
2323 for (; p < e; p++) {
2324 if (!isspace(*p))
2325 return PyInt_FromLong(0);
2326 }
2327 return PyInt_FromLong(1);
2328}
2329
2330
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002331static char isalpha__doc__[] =
2332"S.isalpha() -> int\n\
2333\n\
2334Return 1 if all characters in S are alphabetic\n\
2335and there is at least one character in S, 0 otherwise.";
2336
2337static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002338string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002339{
Fred Drakeba096332000-07-09 07:04:36 +00002340 register const unsigned char *p
2341 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002342 register const unsigned char *e;
2343
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002344 /* Shortcut for single character strings */
2345 if (PyString_GET_SIZE(self) == 1 &&
2346 isalpha(*p))
2347 return PyInt_FromLong(1);
2348
2349 /* Special case for empty strings */
2350 if (PyString_GET_SIZE(self) == 0)
2351 return PyInt_FromLong(0);
2352
2353 e = p + PyString_GET_SIZE(self);
2354 for (; p < e; p++) {
2355 if (!isalpha(*p))
2356 return PyInt_FromLong(0);
2357 }
2358 return PyInt_FromLong(1);
2359}
2360
2361
2362static char isalnum__doc__[] =
2363"S.isalnum() -> int\n\
2364\n\
2365Return 1 if all characters in S are alphanumeric\n\
2366and there is at least one character in S, 0 otherwise.";
2367
2368static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002369string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002370{
Fred Drakeba096332000-07-09 07:04:36 +00002371 register const unsigned char *p
2372 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002373 register const unsigned char *e;
2374
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002375 /* Shortcut for single character strings */
2376 if (PyString_GET_SIZE(self) == 1 &&
2377 isalnum(*p))
2378 return PyInt_FromLong(1);
2379
2380 /* Special case for empty strings */
2381 if (PyString_GET_SIZE(self) == 0)
2382 return PyInt_FromLong(0);
2383
2384 e = p + PyString_GET_SIZE(self);
2385 for (; p < e; p++) {
2386 if (!isalnum(*p))
2387 return PyInt_FromLong(0);
2388 }
2389 return PyInt_FromLong(1);
2390}
2391
2392
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393static char isdigit__doc__[] =
2394"S.isdigit() -> int\n\
2395\n\
2396Return 1 if there are only digit characters in S,\n\
23970 otherwise.";
2398
2399static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002400string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401{
Fred Drakeba096332000-07-09 07:04:36 +00002402 register const unsigned char *p
2403 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002404 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405
Guido van Rossum4c08d552000-03-10 22:55:18 +00002406 /* Shortcut for single character strings */
2407 if (PyString_GET_SIZE(self) == 1 &&
2408 isdigit(*p))
2409 return PyInt_FromLong(1);
2410
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002411 /* Special case for empty strings */
2412 if (PyString_GET_SIZE(self) == 0)
2413 return PyInt_FromLong(0);
2414
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 e = p + PyString_GET_SIZE(self);
2416 for (; p < e; p++) {
2417 if (!isdigit(*p))
2418 return PyInt_FromLong(0);
2419 }
2420 return PyInt_FromLong(1);
2421}
2422
2423
2424static char islower__doc__[] =
2425"S.islower() -> int\n\
2426\n\
2427Return 1 if all cased characters in S are lowercase and there is\n\
2428at least one cased character in S, 0 otherwise.";
2429
2430static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002431string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432{
Fred Drakeba096332000-07-09 07:04:36 +00002433 register const unsigned char *p
2434 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002435 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 int cased;
2437
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438 /* Shortcut for single character strings */
2439 if (PyString_GET_SIZE(self) == 1)
2440 return PyInt_FromLong(islower(*p) != 0);
2441
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002442 /* Special case for empty strings */
2443 if (PyString_GET_SIZE(self) == 0)
2444 return PyInt_FromLong(0);
2445
Guido van Rossum4c08d552000-03-10 22:55:18 +00002446 e = p + PyString_GET_SIZE(self);
2447 cased = 0;
2448 for (; p < e; p++) {
2449 if (isupper(*p))
2450 return PyInt_FromLong(0);
2451 else if (!cased && islower(*p))
2452 cased = 1;
2453 }
2454 return PyInt_FromLong(cased);
2455}
2456
2457
2458static char isupper__doc__[] =
2459"S.isupper() -> int\n\
2460\n\
2461Return 1 if all cased characters in S are uppercase and there is\n\
2462at least one cased character in S, 0 otherwise.";
2463
2464static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002465string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002466{
Fred Drakeba096332000-07-09 07:04:36 +00002467 register const unsigned char *p
2468 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002469 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 int cased;
2471
Guido van Rossum4c08d552000-03-10 22:55:18 +00002472 /* Shortcut for single character strings */
2473 if (PyString_GET_SIZE(self) == 1)
2474 return PyInt_FromLong(isupper(*p) != 0);
2475
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002476 /* Special case for empty strings */
2477 if (PyString_GET_SIZE(self) == 0)
2478 return PyInt_FromLong(0);
2479
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 e = p + PyString_GET_SIZE(self);
2481 cased = 0;
2482 for (; p < e; p++) {
2483 if (islower(*p))
2484 return PyInt_FromLong(0);
2485 else if (!cased && isupper(*p))
2486 cased = 1;
2487 }
2488 return PyInt_FromLong(cased);
2489}
2490
2491
2492static char istitle__doc__[] =
2493"S.istitle() -> int\n\
2494\n\
2495Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2496may only follow uncased characters and lowercase characters only cased\n\
2497ones. Return 0 otherwise.";
2498
2499static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002500string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501{
Fred Drakeba096332000-07-09 07:04:36 +00002502 register const unsigned char *p
2503 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002504 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002505 int cased, previous_is_cased;
2506
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507 /* Shortcut for single character strings */
2508 if (PyString_GET_SIZE(self) == 1)
2509 return PyInt_FromLong(isupper(*p) != 0);
2510
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002511 /* Special case for empty strings */
2512 if (PyString_GET_SIZE(self) == 0)
2513 return PyInt_FromLong(0);
2514
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 e = p + PyString_GET_SIZE(self);
2516 cased = 0;
2517 previous_is_cased = 0;
2518 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002519 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520
2521 if (isupper(ch)) {
2522 if (previous_is_cased)
2523 return PyInt_FromLong(0);
2524 previous_is_cased = 1;
2525 cased = 1;
2526 }
2527 else if (islower(ch)) {
2528 if (!previous_is_cased)
2529 return PyInt_FromLong(0);
2530 previous_is_cased = 1;
2531 cased = 1;
2532 }
2533 else
2534 previous_is_cased = 0;
2535 }
2536 return PyInt_FromLong(cased);
2537}
2538
2539
2540static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002541"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542\n\
2543Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002544Line breaks are not included in the resulting list unless keepends\n\
2545is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546
2547#define SPLIT_APPEND(data, left, right) \
2548 str = PyString_FromStringAndSize(data + left, right - left); \
2549 if (!str) \
2550 goto onError; \
2551 if (PyList_Append(list, str)) { \
2552 Py_DECREF(str); \
2553 goto onError; \
2554 } \
2555 else \
2556 Py_DECREF(str);
2557
2558static PyObject*
2559string_splitlines(PyStringObject *self, PyObject *args)
2560{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 register int i;
2562 register int j;
2563 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002564 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 PyObject *list;
2566 PyObject *str;
2567 char *data;
2568
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002569 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002570 return NULL;
2571
2572 data = PyString_AS_STRING(self);
2573 len = PyString_GET_SIZE(self);
2574
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575 list = PyList_New(0);
2576 if (!list)
2577 goto onError;
2578
2579 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002580 int eol;
2581
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 /* Find a line and append it */
2583 while (i < len && data[i] != '\n' && data[i] != '\r')
2584 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585
2586 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002587 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588 if (i < len) {
2589 if (data[i] == '\r' && i + 1 < len &&
2590 data[i+1] == '\n')
2591 i += 2;
2592 else
2593 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002594 if (keepends)
2595 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002597 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002598 j = i;
2599 }
2600 if (j < len) {
2601 SPLIT_APPEND(data, j, len);
2602 }
2603
2604 return list;
2605
2606 onError:
2607 Py_DECREF(list);
2608 return NULL;
2609}
2610
2611#undef SPLIT_APPEND
2612
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002614static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002615string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 /* Counterparts of the obsolete stropmodule functions; except
2617 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002618 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2619 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2620 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2621 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2622 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2623 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2624 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2625 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2626 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2627 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2628 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2629 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2630 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2631 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2632 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2633 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2634 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2635 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2636 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2637 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2638 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2639 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2640 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2641 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2642 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2643 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2644 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2645 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2646 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2647 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2648 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2649 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2650 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002652 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002653#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654 {NULL, NULL} /* sentinel */
2655};
2656
Guido van Rossumae960af2001-08-30 03:11:59 +00002657staticforward PyObject *
2658str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2659
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002661string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002663 PyObject *x = NULL;
2664 static char *kwlist[] = {"object", 0};
2665
Guido van Rossumae960af2001-08-30 03:11:59 +00002666 if (type != &PyString_Type)
2667 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002668 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2669 return NULL;
2670 if (x == NULL)
2671 return PyString_FromString("");
2672 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673}
2674
Guido van Rossumae960af2001-08-30 03:11:59 +00002675static PyObject *
2676str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2677{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002678 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002679 int n;
2680
2681 assert(PyType_IsSubtype(type, &PyString_Type));
2682 tmp = string_new(&PyString_Type, args, kwds);
2683 if (tmp == NULL)
2684 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002685 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002686 n = PyString_GET_SIZE(tmp);
2687 pnew = type->tp_alloc(type, n);
2688 if (pnew != NULL) {
2689 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2690#ifdef CACHE_HASH
2691 ((PyStringObject *)pnew)->ob_shash =
2692 ((PyStringObject *)tmp)->ob_shash;
2693#endif
2694#ifdef INTERN_STRINGS
2695 ((PyStringObject *)pnew)->ob_sinterned =
2696 ((PyStringObject *)tmp)->ob_sinterned;
2697#endif
2698 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002699 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002700 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002701}
2702
Tim Peters6d6c1a32001-08-02 04:15:00 +00002703static char string_doc[] =
2704"str(object) -> string\n\
2705\n\
2706Return a nice string representation of the object.\n\
2707If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002708
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002709PyTypeObject PyString_Type = {
2710 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002711 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002712 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002713 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002714 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002715 (destructor)string_dealloc, /* tp_dealloc */
2716 (printfunc)string_print, /* tp_print */
2717 0, /* tp_getattr */
2718 0, /* tp_setattr */
2719 0, /* tp_compare */
2720 (reprfunc)string_repr, /* tp_repr */
2721 0, /* tp_as_number */
2722 &string_as_sequence, /* tp_as_sequence */
2723 0, /* tp_as_mapping */
2724 (hashfunc)string_hash, /* tp_hash */
2725 0, /* tp_call */
2726 (reprfunc)string_str, /* tp_str */
2727 PyObject_GenericGetAttr, /* tp_getattro */
2728 0, /* tp_setattro */
2729 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002730 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002731 string_doc, /* tp_doc */
2732 0, /* tp_traverse */
2733 0, /* tp_clear */
2734 (richcmpfunc)string_richcompare, /* tp_richcompare */
2735 0, /* tp_weaklistoffset */
2736 0, /* tp_iter */
2737 0, /* tp_iternext */
2738 string_methods, /* tp_methods */
2739 0, /* tp_members */
2740 0, /* tp_getset */
2741 0, /* tp_base */
2742 0, /* tp_dict */
2743 0, /* tp_descr_get */
2744 0, /* tp_descr_set */
2745 0, /* tp_dictoffset */
2746 0, /* tp_init */
2747 0, /* tp_alloc */
2748 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002749 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750};
2751
2752void
Fred Drakeba096332000-07-09 07:04:36 +00002753PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002754{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002755 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002756 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002757 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002758 if (w == NULL || !PyString_Check(*pv)) {
2759 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002760 *pv = NULL;
2761 return;
2762 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002763 v = string_concat((PyStringObject *) *pv, w);
2764 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002765 *pv = v;
2766}
2767
Guido van Rossum013142a1994-08-30 08:19:36 +00002768void
Fred Drakeba096332000-07-09 07:04:36 +00002769PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002770{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 PyString_Concat(pv, w);
2772 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002773}
2774
2775
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002776/* The following function breaks the notion that strings are immutable:
2777 it changes the size of a string. We get away with this only if there
2778 is only one module referencing the object. You can also think of it
2779 as creating a new string object and destroying the old one, only
2780 more efficiently. In any case, don't use this if the string may
2781 already be known to some other part of the code... */
2782
2783int
Fred Drakeba096332000-07-09 07:04:36 +00002784_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002785{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002786 register PyObject *v;
2787 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002788 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002789 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002790 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002791 Py_DECREF(v);
2792 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002793 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002794 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002795 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002796#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002797 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002798#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002799 _Py_ForgetReference(v);
2800 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002801 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002804 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002805 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002806 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002808 _Py_NewReference(*pv);
2809 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002810 sv->ob_size = newsize;
2811 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002812 return 0;
2813}
Guido van Rossume5372401993-03-16 12:15:04 +00002814
2815/* Helpers for formatstring */
2816
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002818getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002819{
2820 int argidx = *p_argidx;
2821 if (argidx < arglen) {
2822 (*p_argidx)++;
2823 if (arglen < 0)
2824 return args;
2825 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002826 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002827 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002828 PyErr_SetString(PyExc_TypeError,
2829 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002830 return NULL;
2831}
2832
Tim Peters38fd5b62000-09-21 05:43:11 +00002833/* Format codes
2834 * F_LJUST '-'
2835 * F_SIGN '+'
2836 * F_BLANK ' '
2837 * F_ALT '#'
2838 * F_ZERO '0'
2839 */
Guido van Rossume5372401993-03-16 12:15:04 +00002840#define F_LJUST (1<<0)
2841#define F_SIGN (1<<1)
2842#define F_BLANK (1<<2)
2843#define F_ALT (1<<3)
2844#define F_ZERO (1<<4)
2845
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002846static int
Fred Drakeba096332000-07-09 07:04:36 +00002847formatfloat(char *buf, size_t buflen, int flags,
2848 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002849{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002850 /* fmt = '%#.' + `prec` + `type`
2851 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002852 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002853 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002854 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002855 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002856 if (prec < 0)
2857 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002858 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2859 type = 'g';
2860 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002861 /* worst case length calc to ensure no buffer overrun:
2862 fmt = %#.<prec>g
2863 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002864 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002865 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2866 If prec=0 the effective precision is 1 (the leading digit is
2867 always given), therefore increase by one to 10+prec. */
2868 if (buflen <= (size_t)10 + (size_t)prec) {
2869 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002870 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002871 return -1;
2872 }
Guido van Rossume5372401993-03-16 12:15:04 +00002873 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002874 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002875}
2876
Tim Peters38fd5b62000-09-21 05:43:11 +00002877/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2878 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2879 * Python's regular ints.
2880 * Return value: a new PyString*, or NULL if error.
2881 * . *pbuf is set to point into it,
2882 * *plen set to the # of chars following that.
2883 * Caller must decref it when done using pbuf.
2884 * The string starting at *pbuf is of the form
2885 * "-"? ("0x" | "0X")? digit+
2886 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002887 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002888 * There will be at least prec digits, zero-filled on the left if
2889 * necessary to get that many.
2890 * val object to be converted
2891 * flags bitmask of format flags; only F_ALT is looked at
2892 * prec minimum number of digits; 0-fill on left if needed
2893 * type a character in [duoxX]; u acts the same as d
2894 *
2895 * CAUTION: o, x and X conversions on regular ints can never
2896 * produce a '-' sign, but can for Python's unbounded ints.
2897 */
2898PyObject*
2899_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2900 char **pbuf, int *plen)
2901{
2902 PyObject *result = NULL;
2903 char *buf;
2904 int i;
2905 int sign; /* 1 if '-', else 0 */
2906 int len; /* number of characters */
2907 int numdigits; /* len == numnondigits + numdigits */
2908 int numnondigits = 0;
2909
2910 switch (type) {
2911 case 'd':
2912 case 'u':
2913 result = val->ob_type->tp_str(val);
2914 break;
2915 case 'o':
2916 result = val->ob_type->tp_as_number->nb_oct(val);
2917 break;
2918 case 'x':
2919 case 'X':
2920 numnondigits = 2;
2921 result = val->ob_type->tp_as_number->nb_hex(val);
2922 break;
2923 default:
2924 assert(!"'type' not in [duoxX]");
2925 }
2926 if (!result)
2927 return NULL;
2928
2929 /* To modify the string in-place, there can only be one reference. */
2930 if (result->ob_refcnt != 1) {
2931 PyErr_BadInternalCall();
2932 return NULL;
2933 }
2934 buf = PyString_AsString(result);
2935 len = PyString_Size(result);
2936 if (buf[len-1] == 'L') {
2937 --len;
2938 buf[len] = '\0';
2939 }
2940 sign = buf[0] == '-';
2941 numnondigits += sign;
2942 numdigits = len - numnondigits;
2943 assert(numdigits > 0);
2944
Tim Petersfff53252001-04-12 18:38:48 +00002945 /* Get rid of base marker unless F_ALT */
2946 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002947 /* Need to skip 0x, 0X or 0. */
2948 int skipped = 0;
2949 switch (type) {
2950 case 'o':
2951 assert(buf[sign] == '0');
2952 /* If 0 is only digit, leave it alone. */
2953 if (numdigits > 1) {
2954 skipped = 1;
2955 --numdigits;
2956 }
2957 break;
2958 case 'x':
2959 case 'X':
2960 assert(buf[sign] == '0');
2961 assert(buf[sign + 1] == 'x');
2962 skipped = 2;
2963 numnondigits -= 2;
2964 break;
2965 }
2966 if (skipped) {
2967 buf += skipped;
2968 len -= skipped;
2969 if (sign)
2970 buf[0] = '-';
2971 }
2972 assert(len == numnondigits + numdigits);
2973 assert(numdigits > 0);
2974 }
2975
2976 /* Fill with leading zeroes to meet minimum width. */
2977 if (prec > numdigits) {
2978 PyObject *r1 = PyString_FromStringAndSize(NULL,
2979 numnondigits + prec);
2980 char *b1;
2981 if (!r1) {
2982 Py_DECREF(result);
2983 return NULL;
2984 }
2985 b1 = PyString_AS_STRING(r1);
2986 for (i = 0; i < numnondigits; ++i)
2987 *b1++ = *buf++;
2988 for (i = 0; i < prec - numdigits; i++)
2989 *b1++ = '0';
2990 for (i = 0; i < numdigits; i++)
2991 *b1++ = *buf++;
2992 *b1 = '\0';
2993 Py_DECREF(result);
2994 result = r1;
2995 buf = PyString_AS_STRING(result);
2996 len = numnondigits + prec;
2997 }
2998
2999 /* Fix up case for hex conversions. */
3000 switch (type) {
3001 case 'x':
3002 /* Need to convert all upper case letters to lower case. */
3003 for (i = 0; i < len; i++)
3004 if (buf[i] >= 'A' && buf[i] <= 'F')
3005 buf[i] += 'a'-'A';
3006 break;
3007 case 'X':
3008 /* Need to convert 0x to 0X (and -0x to -0X). */
3009 if (buf[sign + 1] == 'x')
3010 buf[sign + 1] = 'X';
3011 break;
3012 }
3013 *pbuf = buf;
3014 *plen = len;
3015 return result;
3016}
3017
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003018static int
Fred Drakeba096332000-07-09 07:04:36 +00003019formatint(char *buf, size_t buflen, int flags,
3020 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003021{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003022 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003023 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3024 + 1 + 1 = 24 */
3025 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003026 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003027 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003028 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003029 if (prec < 0)
3030 prec = 1;
3031 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003032 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003033 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003034 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003035 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003036 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003037 return -1;
3038 }
Guido van Rossume5372401993-03-16 12:15:04 +00003039 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003040 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3041 * but we want it (for consistency with other %#x conversions, and
3042 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003043 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3044 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3045 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003046 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003047 if (x == 0 &&
3048 (flags & F_ALT) &&
3049 (type == 'x' || type == 'X') &&
3050 buf[1] != (char)type) /* this last always true under std C */
3051 {
Tim Petersfff53252001-04-12 18:38:48 +00003052 memmove(buf+2, buf, strlen(buf) + 1);
3053 buf[0] = '0';
3054 buf[1] = (char)type;
3055 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003056 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003057}
3058
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003059static int
Fred Drakeba096332000-07-09 07:04:36 +00003060formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003061{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003062 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003063 if (PyString_Check(v)) {
3064 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003065 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003066 }
3067 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003068 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003069 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003070 }
3071 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003072 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003073}
3074
Guido van Rossum013142a1994-08-30 08:19:36 +00003075
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003076/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3077
3078 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3079 chars are formatted. XXX This is a magic number. Each formatting
3080 routine does bounds checking to ensure no overflow, but a better
3081 solution may be to malloc a buffer of appropriate size for each
3082 format. For now, the current solution is sufficient.
3083*/
3084#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003087PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003088{
3089 char *fmt, *res;
3090 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003091 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003092 PyObject *result, *orig_args;
3093#ifdef Py_USING_UNICODE
3094 PyObject *v, *w;
3095#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003096 PyObject *dict = NULL;
3097 if (format == NULL || !PyString_Check(format) || args == NULL) {
3098 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003099 return NULL;
3100 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003101 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 fmt = PyString_AsString(format);
3103 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003104 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003105 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003106 if (result == NULL)
3107 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003108 res = PyString_AsString(result);
3109 if (PyTuple_Check(args)) {
3110 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003111 argidx = 0;
3112 }
3113 else {
3114 arglen = -1;
3115 argidx = -2;
3116 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003117 if (args->ob_type->tp_as_mapping)
3118 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003119 while (--fmtcnt >= 0) {
3120 if (*fmt != '%') {
3121 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003122 rescnt = fmtcnt + 100;
3123 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003124 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003125 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003126 res = PyString_AsString(result)
3127 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003128 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003129 }
3130 *res++ = *fmt++;
3131 }
3132 else {
3133 /* Got a format specifier */
3134 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003135 int width = -1;
3136 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003137 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003138 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003139 PyObject *v = NULL;
3140 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003141 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003142 int sign;
3143 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003144 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003145#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003146 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003147 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003148#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003149
Guido van Rossumda9c2711996-12-05 21:58:58 +00003150 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003151 if (*fmt == '(') {
3152 char *keystart;
3153 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003154 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003155 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003156
3157 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003158 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003159 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003160 goto error;
3161 }
3162 ++fmt;
3163 --fmtcnt;
3164 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003165 /* Skip over balanced parentheses */
3166 while (pcount > 0 && --fmtcnt >= 0) {
3167 if (*fmt == ')')
3168 --pcount;
3169 else if (*fmt == '(')
3170 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003171 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003172 }
3173 keylen = fmt - keystart - 1;
3174 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003175 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003176 "incomplete format key");
3177 goto error;
3178 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003179 key = PyString_FromStringAndSize(keystart,
3180 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003181 if (key == NULL)
3182 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003183 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003185 args_owned = 0;
3186 }
3187 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003188 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003189 if (args == NULL) {
3190 goto error;
3191 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003192 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003193 arglen = -1;
3194 argidx = -2;
3195 }
Guido van Rossume5372401993-03-16 12:15:04 +00003196 while (--fmtcnt >= 0) {
3197 switch (c = *fmt++) {
3198 case '-': flags |= F_LJUST; continue;
3199 case '+': flags |= F_SIGN; continue;
3200 case ' ': flags |= F_BLANK; continue;
3201 case '#': flags |= F_ALT; continue;
3202 case '0': flags |= F_ZERO; continue;
3203 }
3204 break;
3205 }
3206 if (c == '*') {
3207 v = getnextarg(args, arglen, &argidx);
3208 if (v == NULL)
3209 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003210 if (!PyInt_Check(v)) {
3211 PyErr_SetString(PyExc_TypeError,
3212 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003213 goto error;
3214 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003216 if (width < 0) {
3217 flags |= F_LJUST;
3218 width = -width;
3219 }
Guido van Rossume5372401993-03-16 12:15:04 +00003220 if (--fmtcnt >= 0)
3221 c = *fmt++;
3222 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003223 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003224 width = c - '0';
3225 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003226 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003227 if (!isdigit(c))
3228 break;
3229 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003230 PyErr_SetString(
3231 PyExc_ValueError,
3232 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003233 goto error;
3234 }
3235 width = width*10 + (c - '0');
3236 }
3237 }
3238 if (c == '.') {
3239 prec = 0;
3240 if (--fmtcnt >= 0)
3241 c = *fmt++;
3242 if (c == '*') {
3243 v = getnextarg(args, arglen, &argidx);
3244 if (v == NULL)
3245 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 if (!PyInt_Check(v)) {
3247 PyErr_SetString(
3248 PyExc_TypeError,
3249 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003250 goto error;
3251 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003252 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003253 if (prec < 0)
3254 prec = 0;
3255 if (--fmtcnt >= 0)
3256 c = *fmt++;
3257 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003258 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003259 prec = c - '0';
3260 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003261 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003262 if (!isdigit(c))
3263 break;
3264 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003265 PyErr_SetString(
3266 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003267 "prec too big");
3268 goto error;
3269 }
3270 prec = prec*10 + (c - '0');
3271 }
3272 }
3273 } /* prec */
3274 if (fmtcnt >= 0) {
3275 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003276 if (--fmtcnt >= 0)
3277 c = *fmt++;
3278 }
3279 }
3280 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003281 PyErr_SetString(PyExc_ValueError,
3282 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003283 goto error;
3284 }
3285 if (c != '%') {
3286 v = getnextarg(args, arglen, &argidx);
3287 if (v == NULL)
3288 goto error;
3289 }
3290 sign = 0;
3291 fill = ' ';
3292 switch (c) {
3293 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003294 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003295 len = 1;
3296 break;
3297 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003298 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003299#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003300 if (PyUnicode_Check(v)) {
3301 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003302 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003303 goto unicode;
3304 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003305#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003306 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003307 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003308 else
3309 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003310 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003311 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003312 if (!PyString_Check(temp)) {
3313 PyErr_SetString(PyExc_TypeError,
3314 "%s argument has non-string str()");
3315 goto error;
3316 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003317 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003318 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003319 if (prec >= 0 && len > prec)
3320 len = prec;
3321 break;
3322 case 'i':
3323 case 'd':
3324 case 'u':
3325 case 'o':
3326 case 'x':
3327 case 'X':
3328 if (c == 'i')
3329 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003330 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003331 temp = _PyString_FormatLong(v, flags,
3332 prec, c, &pbuf, &len);
3333 if (!temp)
3334 goto error;
3335 /* unbounded ints can always produce
3336 a sign character! */
3337 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003338 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003339 else {
3340 pbuf = formatbuf;
3341 len = formatint(pbuf, sizeof(formatbuf),
3342 flags, prec, c, v);
3343 if (len < 0)
3344 goto error;
3345 /* only d conversion is signed */
3346 sign = c == 'd';
3347 }
3348 if (flags & F_ZERO)
3349 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003350 break;
3351 case 'e':
3352 case 'E':
3353 case 'f':
3354 case 'g':
3355 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003356 pbuf = formatbuf;
3357 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003358 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003359 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003360 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003361 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003362 fill = '0';
3363 break;
3364 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003365 pbuf = formatbuf;
3366 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003367 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003368 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003369 break;
3370 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003371 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003372 "unsupported format character '%c' (0x%x) "
3373 "at index %i",
3374 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003375 goto error;
3376 }
3377 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003378 if (*pbuf == '-' || *pbuf == '+') {
3379 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003380 len--;
3381 }
3382 else if (flags & F_SIGN)
3383 sign = '+';
3384 else if (flags & F_BLANK)
3385 sign = ' ';
3386 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003387 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003388 }
3389 if (width < len)
3390 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003391 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003392 reslen -= rescnt;
3393 rescnt = width + fmtcnt + 100;
3394 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003395 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003396 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003397 res = PyString_AsString(result)
3398 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003399 }
3400 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003401 if (fill != ' ')
3402 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003403 rescnt--;
3404 if (width > len)
3405 width--;
3406 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003407 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3408 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003409 assert(pbuf[1] == c);
3410 if (fill != ' ') {
3411 *res++ = *pbuf++;
3412 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003413 }
Tim Petersfff53252001-04-12 18:38:48 +00003414 rescnt -= 2;
3415 width -= 2;
3416 if (width < 0)
3417 width = 0;
3418 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003419 }
3420 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003421 do {
3422 --rescnt;
3423 *res++ = fill;
3424 } while (--width > len);
3425 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003426 if (fill == ' ') {
3427 if (sign)
3428 *res++ = sign;
3429 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003430 (c == 'x' || c == 'X')) {
3431 assert(pbuf[0] == '0');
3432 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003433 *res++ = *pbuf++;
3434 *res++ = *pbuf++;
3435 }
3436 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003437 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003438 res += len;
3439 rescnt -= len;
3440 while (--width >= len) {
3441 --rescnt;
3442 *res++ = ' ';
3443 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003444 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003446 "not all arguments converted");
3447 goto error;
3448 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003450 } /* '%' */
3451 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003452 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003453 PyErr_SetString(PyExc_TypeError,
3454 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003455 goto error;
3456 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003457 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003458 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003459 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003460 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003461 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003462
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003463#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003464 unicode:
3465 if (args_owned) {
3466 Py_DECREF(args);
3467 args_owned = 0;
3468 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003469 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003470 if (PyTuple_Check(orig_args) && argidx > 0) {
3471 PyObject *v;
3472 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3473 v = PyTuple_New(n);
3474 if (v == NULL)
3475 goto error;
3476 while (--n >= 0) {
3477 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3478 Py_INCREF(w);
3479 PyTuple_SET_ITEM(v, n, w);
3480 }
3481 args = v;
3482 } else {
3483 Py_INCREF(orig_args);
3484 args = orig_args;
3485 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003486 args_owned = 1;
3487 /* Take what we have of the result and let the Unicode formatting
3488 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003489 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003490 if (_PyString_Resize(&result, rescnt))
3491 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003492 fmtcnt = PyString_GET_SIZE(format) - \
3493 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003494 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3495 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003496 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003497 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003498 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003499 if (v == NULL)
3500 goto error;
3501 /* Paste what we have (result) to what the Unicode formatting
3502 function returned (v) and return the result (or error) */
3503 w = PyUnicode_Concat(result, v);
3504 Py_DECREF(result);
3505 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003506 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003507 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003508#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003509
Guido van Rossume5372401993-03-16 12:15:04 +00003510 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003512 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003513 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003514 }
Guido van Rossume5372401993-03-16 12:15:04 +00003515 return NULL;
3516}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003517
3518
3519#ifdef INTERN_STRINGS
3520
Barry Warsaw4df762f2000-08-16 23:41:01 +00003521/* This dictionary will leak at PyString_Fini() time. That's acceptable
3522 * because PyString_Fini() specifically frees interned strings that are
3523 * only referenced by this dictionary. The CVS log entry for revision 2.45
3524 * says:
3525 *
3526 * Change the Fini function to only remove otherwise unreferenced
3527 * strings from the interned table. There are references in
3528 * hard-to-find static variables all over the interpreter, and it's not
3529 * worth trying to get rid of all those; but "uninterning" isn't fair
3530 * either and may cause subtle failures later -- so we have to keep them
3531 * in the interned table.
3532 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003533static PyObject *interned;
3534
3535void
Fred Drakeba096332000-07-09 07:04:36 +00003536PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003537{
3538 register PyStringObject *s = (PyStringObject *)(*p);
3539 PyObject *t;
3540 if (s == NULL || !PyString_Check(s))
3541 Py_FatalError("PyString_InternInPlace: strings only please!");
3542 if ((t = s->ob_sinterned) != NULL) {
3543 if (t == (PyObject *)s)
3544 return;
3545 Py_INCREF(t);
3546 *p = t;
3547 Py_DECREF(s);
3548 return;
3549 }
3550 if (interned == NULL) {
3551 interned = PyDict_New();
3552 if (interned == NULL)
3553 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003554 }
3555 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3556 Py_INCREF(t);
3557 *p = s->ob_sinterned = t;
3558 Py_DECREF(s);
3559 return;
3560 }
Tim Peters111f6092001-09-12 07:54:51 +00003561 /* Ensure that only true string objects appear in the intern dict,
3562 and as the value of ob_sinterned. */
3563 if (PyString_CheckExact(s)) {
3564 t = (PyObject *)s;
3565 if (PyDict_SetItem(interned, t, t) == 0) {
3566 s->ob_sinterned = t;
3567 return;
3568 }
3569 }
3570 else {
3571 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3572 PyString_GET_SIZE(s));
3573 if (t != NULL) {
3574 if (PyDict_SetItem(interned, t, t) == 0) {
3575 *p = s->ob_sinterned = t;
3576 Py_DECREF(s);
3577 return;
3578 }
3579 Py_DECREF(t);
3580 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003581 }
3582 PyErr_Clear();
3583}
3584
3585
3586PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003587PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003588{
3589 PyObject *s = PyString_FromString(cp);
3590 if (s == NULL)
3591 return NULL;
3592 PyString_InternInPlace(&s);
3593 return s;
3594}
3595
3596#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003597
3598void
Fred Drakeba096332000-07-09 07:04:36 +00003599PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003600{
3601 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003602 for (i = 0; i < UCHAR_MAX + 1; i++) {
3603 Py_XDECREF(characters[i]);
3604 characters[i] = NULL;
3605 }
3606#ifndef DONT_SHARE_SHORT_STRINGS
3607 Py_XDECREF(nullstring);
3608 nullstring = NULL;
3609#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003610#ifdef INTERN_STRINGS
3611 if (interned) {
3612 int pos, changed;
3613 PyObject *key, *value;
3614 do {
3615 changed = 0;
3616 pos = 0;
3617 while (PyDict_Next(interned, &pos, &key, &value)) {
3618 if (key->ob_refcnt == 2 && key == value) {
3619 PyDict_DelItem(interned, key);
3620 changed = 1;
3621 }
3622 }
3623 } while (changed);
3624 }
3625#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003626}
Barry Warsawa903ad982001-02-23 16:40:48 +00003627
3628#ifdef INTERN_STRINGS
3629void _Py_ReleaseInternedStrings(void)
3630{
3631 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003632 fprintf(stderr, "releasing interned strings\n");
3633 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003634 Py_DECREF(interned);
3635 interned = NULL;
3636 }
3637}
3638#endif /* INTERN_STRINGS */