blob: 2e0d6d65ae22ab1a0ab922f6244c1c2b4106bed2 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
Martin v. Löwisd1327502001-12-02 18:09:41 +000022 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
23 to share string objects. When the size of the string is zero, these
24 routines always return a pointer to the same string object; when the size
25 is one, they return a pointer to an already existing object if the contents
26 of the string is known. For PyString_FromString() this is always the case,
27 for PyString_FromStringAndSize() this is the case when the first argument
28 in not NULL.
29
30 A common practice of allocating a string and then filling it in or changing
31 it must be done carefully. It is only allowed to change the contents of
32 the string if the object was gotten from PyString_FromStringAndSize() with
33 a NULL first argument, because in the future these routines may try to do
34 even more sharing of objects.
35
36 The parameter `size' denotes number of characters to allocate, not counting
37 the null terminating character. If the `str' argument is not NULL, then it
38 must point to a null-terminated string of length `size'.
39
40 The member `op->ob_size' denotes the number of bytes of data in the string,
41 not counting the null terminating character, and is therefore equal to the
42 `size' parameter.
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000048#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000049 if (size == 0 && (op = nullstring) != NULL) {
50#ifdef COUNT_ALLOCS
51 null_strings++;
52#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 Py_INCREF(op);
54 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000056 if (size == 1 && str != NULL &&
57 (op = characters[*str & UCHAR_MAX]) != NULL)
58 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059#ifdef COUNT_ALLOCS
60 one_strings++;
61#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 Py_INCREF(op);
63 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000065#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000066
67 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000069 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000070 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000072 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef CACHE_HASH
74 op->ob_shash = -1;
75#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000076#ifdef INTERN_STRINGS
77 op->ob_sinterned = NULL;
78#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (str != NULL)
80 memcpy(op->ob_sval, str, size);
81 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000090 PyObject *t = (PyObject *)op;
91 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000092 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000142 PyObject *t = (PyObject *)op;
143 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000144 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000148 PyObject *t = (PyObject *)op;
149 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000150 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000154#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
170 count = vargs;
171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' in %ld, since it doesn't change the
180 width. although only %d is supported (see
181 "expand" section below), others can be easily
182 add */
183 if (*f == 'l' && *(f+1) == 'd')
184 ++f;
185
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'i': case 'x':
194 (void) va_arg(count, int);
195 /* 20 bytes should be enough to hold a 64-bit
196 integer */
197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
208 */
209 n += 19;
210 break;
211 default:
212 /* if we stumble upon an unknown
213 formatting code, copy the rest of
214 the format string to the output
215 string. (we cannot just skip the
216 code, since there's no way to know
217 what's in the argument list) */
218 n += strlen(p);
219 goto expand;
220 }
221 } else
222 n++;
223 }
224 expand:
225 /* step 2: fill the buffer */
226 string = PyString_FromStringAndSize(NULL, n);
227 if (!string)
228 return NULL;
229
230 s = PyString_AsString(string);
231
232 for (f = format; *f; f++) {
233 if (*f == '%') {
234 const char* p = f++;
235 int i, longflag = 0;
236 /* parse the width.precision part (we're only
237 interested in the precision value, if any) */
238 n = 0;
239 while (isdigit(Py_CHARMASK(*f)))
240 n = (n*10) + *f++ - '0';
241 if (*f == '.') {
242 f++;
243 n = 0;
244 while (isdigit(Py_CHARMASK(*f)))
245 n = (n*10) + *f++ - '0';
246 }
247 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
248 f++;
249 /* handle the long flag, but only for %ld. others
250 can be added when necessary. */
251 if (*f == 'l' && *(f+1) == 'd') {
252 longflag = 1;
253 ++f;
254 }
255
256 switch (*f) {
257 case 'c':
258 *s++ = va_arg(vargs, int);
259 break;
260 case 'd':
261 if (longflag)
262 sprintf(s, "%ld", va_arg(vargs, long));
263 else
264 sprintf(s, "%d", va_arg(vargs, int));
265 s += strlen(s);
266 break;
267 case 'i':
268 sprintf(s, "%i", va_arg(vargs, int));
269 s += strlen(s);
270 break;
271 case 'x':
272 sprintf(s, "%x", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 's':
276 p = va_arg(vargs, char*);
277 i = strlen(p);
278 if (n > 0 && i > n)
279 i = n;
280 memcpy(s, p, i);
281 s += i;
282 break;
283 case 'p':
284 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000285 /* %p is ill-defined: ensure leading 0x. */
286 if (s[1] == 'X')
287 s[1] = 'x';
288 else if (s[1] != 'x') {
289 memmove(s+2, s, strlen(s)+1);
290 s[0] = '0';
291 s[1] = 'x';
292 }
Barry Warsawdadace02001-08-24 18:32:06 +0000293 s += strlen(s);
294 break;
295 case '%':
296 *s++ = '%';
297 break;
298 default:
299 strcpy(s, p);
300 s += strlen(s);
301 goto end;
302 }
303 } else
304 *s++ = *f;
305 }
306
307 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000309 return string;
310}
311
312PyObject *
313PyString_FromFormat(const char *format, ...)
314{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000315 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000316 va_list vargs;
317
318#ifdef HAVE_STDARG_PROTOTYPES
319 va_start(vargs, format);
320#else
321 va_start(vargs);
322#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 ret = PyString_FromFormatV(format, vargs);
324 va_end(vargs);
325 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000326}
327
328
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000329PyObject *PyString_Decode(const char *s,
330 int size,
331 const char *encoding,
332 const char *errors)
333{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000334 PyObject *v, *str;
335
336 str = PyString_FromStringAndSize(s, size);
337 if (str == NULL)
338 return NULL;
339 v = PyString_AsDecodedString(str, encoding, errors);
340 Py_DECREF(str);
341 return v;
342}
343
344PyObject *PyString_AsDecodedObject(PyObject *str,
345 const char *encoding,
346 const char *errors)
347{
348 PyObject *v;
349
350 if (!PyString_Check(str)) {
351 PyErr_BadArgument();
352 goto onError;
353 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000354
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000355 if (encoding == NULL) {
356#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000358#else
359 PyErr_SetString(PyExc_ValueError, "no encoding specified");
360 goto onError;
361#endif
362 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000363
364 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 v = PyCodec_Decode(str, encoding, errors);
366 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000368
369 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000370
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372 return NULL;
373}
374
375PyObject *PyString_AsDecodedString(PyObject *str,
376 const char *encoding,
377 const char *errors)
378{
379 PyObject *v;
380
381 v = PyString_AsDecodedObject(str, encoding, errors);
382 if (v == NULL)
383 goto onError;
384
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000385#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 /* Convert Unicode to a string using the default encoding */
387 if (PyUnicode_Check(v)) {
388 PyObject *temp = v;
389 v = PyUnicode_AsEncodedString(v, NULL, NULL);
390 Py_DECREF(temp);
391 if (v == NULL)
392 goto onError;
393 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000394#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000395 if (!PyString_Check(v)) {
396 PyErr_Format(PyExc_TypeError,
397 "decoder did not return a string object (type=%.400s)",
398 v->ob_type->tp_name);
399 Py_DECREF(v);
400 goto onError;
401 }
402
403 return v;
404
405 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000406 return NULL;
407}
408
409PyObject *PyString_Encode(const char *s,
410 int size,
411 const char *encoding,
412 const char *errors)
413{
414 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000415
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000416 str = PyString_FromStringAndSize(s, size);
417 if (str == NULL)
418 return NULL;
419 v = PyString_AsEncodedString(str, encoding, errors);
420 Py_DECREF(str);
421 return v;
422}
423
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000425 const char *encoding,
426 const char *errors)
427{
428 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000429
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 if (!PyString_Check(str)) {
431 PyErr_BadArgument();
432 goto onError;
433 }
434
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000435 if (encoding == NULL) {
436#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000438#else
439 PyErr_SetString(PyExc_ValueError, "no encoding specified");
440 goto onError;
441#endif
442 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000443
444 /* Encode via the codec registry */
445 v = PyCodec_Encode(str, encoding, errors);
446 if (v == NULL)
447 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000448
449 return v;
450
451 onError:
452 return NULL;
453}
454
455PyObject *PyString_AsEncodedString(PyObject *str,
456 const char *encoding,
457 const char *errors)
458{
459 PyObject *v;
460
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000461 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000462 if (v == NULL)
463 goto onError;
464
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000465#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000466 /* Convert Unicode to a string using the default encoding */
467 if (PyUnicode_Check(v)) {
468 PyObject *temp = v;
469 v = PyUnicode_AsEncodedString(v, NULL, NULL);
470 Py_DECREF(temp);
471 if (v == NULL)
472 goto onError;
473 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000474#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000475 if (!PyString_Check(v)) {
476 PyErr_Format(PyExc_TypeError,
477 "encoder did not return a string object (type=%.400s)",
478 v->ob_type->tp_name);
479 Py_DECREF(v);
480 goto onError;
481 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000482
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000484
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000485 onError:
486 return NULL;
487}
488
Guido van Rossum234f9421993-06-17 12:35:49 +0000489static void
Fred Drakeba096332000-07-09 07:04:36 +0000490string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000491{
Guido van Rossum9475a232001-10-05 20:51:39 +0000492 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000493}
494
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000495static int
496string_getsize(register PyObject *op)
497{
498 char *s;
499 int len;
500 if (PyString_AsStringAndSize(op, &s, &len))
501 return -1;
502 return len;
503}
504
505static /*const*/ char *
506string_getbuffer(register PyObject *op)
507{
508 char *s;
509 int len;
510 if (PyString_AsStringAndSize(op, &s, &len))
511 return NULL;
512 return s;
513}
514
Guido van Rossumd7047b31995-01-02 19:07:15 +0000515int
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518 if (!PyString_Check(op))
519 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000520 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521}
522
523/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000524PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000526 if (!PyString_Check(op))
527 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000528 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529}
530
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000531int
532PyString_AsStringAndSize(register PyObject *obj,
533 register char **s,
534 register int *len)
535{
536 if (s == NULL) {
537 PyErr_BadInternalCall();
538 return -1;
539 }
540
541 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543 if (PyUnicode_Check(obj)) {
544 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
545 if (obj == NULL)
546 return -1;
547 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000548 else
549#endif
550 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000551 PyErr_Format(PyExc_TypeError,
552 "expected string or Unicode object, "
553 "%.200s found", obj->ob_type->tp_name);
554 return -1;
555 }
556 }
557
558 *s = PyString_AS_STRING(obj);
559 if (len != NULL)
560 *len = PyString_GET_SIZE(obj);
561 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
562 PyErr_SetString(PyExc_TypeError,
563 "expected string without null bytes");
564 return -1;
565 }
566 return 0;
567}
568
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000569/* Methods */
570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571static int
Fred Drakeba096332000-07-09 07:04:36 +0000572string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573{
574 int i;
575 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000576 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000577
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000578 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000579 if (! PyString_CheckExact(op)) {
580 int ret;
581 /* A str subclass may have its own __str__ method. */
582 op = (PyStringObject *) PyObject_Str((PyObject *)op);
583 if (op == NULL)
584 return -1;
585 ret = string_print(op, fp, flags);
586 Py_DECREF(op);
587 return ret;
588 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000589 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000591 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000592 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000593
Thomas Wouters7e474022000-07-16 12:04:32 +0000594 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 quote = '\'';
596 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
597 quote = '"';
598
599 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000600 for (i = 0; i < op->ob_size; i++) {
601 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000602 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000603 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000604 else if (c == '\t')
605 fprintf(fp, "\\t");
606 else if (c == '\n')
607 fprintf(fp, "\\n");
608 else if (c == '\r')
609 fprintf(fp, "\\r");
610 else if (c < ' ' || c >= 0x7f)
611 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000613 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000614 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000616 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000617}
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000620string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000622 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
623 PyObject *v;
624 if (newsize > INT_MAX) {
625 PyErr_SetString(PyExc_OverflowError,
626 "string is too large to make repr");
627 }
628 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000630 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631 }
632 else {
633 register int i;
634 register char c;
635 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 int quote;
637
Thomas Wouters7e474022000-07-16 12:04:32 +0000638 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 quote = '\'';
640 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
641 quote = '"';
642
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000643 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000644 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645 for (i = 0; i < op->ob_size; i++) {
646 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000647 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000649 else if (c == '\t')
650 *p++ = '\\', *p++ = 't';
651 else if (c == '\n')
652 *p++ = '\\', *p++ = 'n';
653 else if (c == '\r')
654 *p++ = '\\', *p++ = 'r';
655 else if (c < ' ' || c >= 0x7f) {
656 sprintf(p, "\\x%02x", c & 0xff);
657 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658 }
659 else
660 *p++ = c;
661 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000662 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000664 _PyString_Resize(
665 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000666 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668}
669
Guido van Rossum189f1df2001-05-01 16:51:53 +0000670static PyObject *
671string_str(PyObject *s)
672{
Tim Petersc9933152001-10-16 20:18:24 +0000673 assert(PyString_Check(s));
674 if (PyString_CheckExact(s)) {
675 Py_INCREF(s);
676 return s;
677 }
678 else {
679 /* Subtype -- return genuine string with the same value. */
680 PyStringObject *t = (PyStringObject *) s;
681 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
682 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000683}
684
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000685static int
Fred Drakeba096332000-07-09 07:04:36 +0000686string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687{
688 return a->ob_size;
689}
690
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000692string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
694 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 register PyStringObject *op;
696 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000697#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000698 if (PyUnicode_Check(bb))
699 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000700#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000701 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000702 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000703 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704 return NULL;
705 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000708 if ((a->ob_size == 0 || b->ob_size == 0) &&
709 PyString_CheckExact(a) && PyString_CheckExact(b)) {
710 if (a->ob_size == 0) {
711 Py_INCREF(bb);
712 return bb;
713 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 Py_INCREF(a);
715 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716 }
717 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000718 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000719 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000721 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000723 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000724#ifdef CACHE_HASH
725 op->ob_shash = -1;
726#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727#ifdef INTERN_STRINGS
728 op->ob_sinterned = NULL;
729#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000730 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
731 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
732 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734#undef b
735}
736
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000738string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000739{
740 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000741 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000742 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000743 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744 if (n < 0)
745 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000746 /* watch out for overflows: the size can overflow int,
747 * and the # of bytes needed can overflow size_t
748 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000750 if (n && size / n != a->ob_size) {
751 PyErr_SetString(PyExc_OverflowError,
752 "repeated string is too long");
753 return NULL;
754 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000755 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000756 Py_INCREF(a);
757 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000758 }
Tim Peters8f422462000-09-09 06:13:41 +0000759 nbytes = size * sizeof(char);
760 if (nbytes / sizeof(char) != (size_t)size ||
761 nbytes + sizeof(PyStringObject) <= nbytes) {
762 PyErr_SetString(PyExc_OverflowError,
763 "repeated string is too long");
764 return NULL;
765 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000766 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000767 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000768 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000769 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000770 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000771#ifdef CACHE_HASH
772 op->ob_shash = -1;
773#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000774#ifdef INTERN_STRINGS
775 op->ob_sinterned = NULL;
776#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000777 for (i = 0; i < size; i += a->ob_size)
778 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
779 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000780 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781}
782
783/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
784
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000786string_slice(register PyStringObject *a, register int i, register int j)
787 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788{
789 if (i < 0)
790 i = 0;
791 if (j < 0)
792 j = 0; /* Avoid signed/unsigned bug in next line */
793 if (j > a->ob_size)
794 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000795 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
796 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000797 Py_INCREF(a);
798 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 }
800 if (j < i)
801 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000802 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000803}
804
Guido van Rossum9284a572000-03-07 15:53:43 +0000805static int
Fred Drakeba096332000-07-09 07:04:36 +0000806string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000807{
808 register char *s, *end;
809 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000810#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000811 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000812 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000813#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000814 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000815 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000816 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000817 return -1;
818 }
819 c = PyString_AsString(el)[0];
820 s = PyString_AsString(a);
821 end = s + PyString_Size(a);
822 while (s < end) {
823 if (c == *s++)
824 return 1;
825 }
826 return 0;
827}
828
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000830string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000832 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000833 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000835 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 return NULL;
837 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000838 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000839 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000840 if (v == NULL)
841 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000842 else {
843#ifdef COUNT_ALLOCS
844 one_strings++;
845#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000846 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000847 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000848 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849}
850
Martin v. Löwiscd353062001-05-24 16:56:35 +0000851static PyObject*
852string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 int c;
855 int len_a, len_b;
856 int min_len;
857 PyObject *result;
858
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000859 /* Make sure both arguments are strings. */
860 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000861 result = Py_NotImplemented;
862 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000863 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000864 if (a == b) {
865 switch (op) {
866 case Py_EQ:case Py_LE:case Py_GE:
867 result = Py_True;
868 goto out;
869 case Py_NE:case Py_LT:case Py_GT:
870 result = Py_False;
871 goto out;
872 }
873 }
874 if (op == Py_EQ) {
875 /* Supporting Py_NE here as well does not save
876 much time, since Py_NE is rarely used. */
877 if (a->ob_size == b->ob_size
878 && (a->ob_sval[0] == b->ob_sval[0]
879 && memcmp(a->ob_sval, b->ob_sval,
880 a->ob_size) == 0)) {
881 result = Py_True;
882 } else {
883 result = Py_False;
884 }
885 goto out;
886 }
887 len_a = a->ob_size; len_b = b->ob_size;
888 min_len = (len_a < len_b) ? len_a : len_b;
889 if (min_len > 0) {
890 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
891 if (c==0)
892 c = memcmp(a->ob_sval, b->ob_sval, min_len);
893 }else
894 c = 0;
895 if (c == 0)
896 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
897 switch (op) {
898 case Py_LT: c = c < 0; break;
899 case Py_LE: c = c <= 0; break;
900 case Py_EQ: assert(0); break; /* unreachable */
901 case Py_NE: c = c != 0; break;
902 case Py_GT: c = c > 0; break;
903 case Py_GE: c = c >= 0; break;
904 default:
905 result = Py_NotImplemented;
906 goto out;
907 }
908 result = c ? Py_True : Py_False;
909 out:
910 Py_INCREF(result);
911 return result;
912}
913
914int
915_PyString_Eq(PyObject *o1, PyObject *o2)
916{
917 PyStringObject *a, *b;
918 a = (PyStringObject*)o1;
919 b = (PyStringObject*)o2;
920 return a->ob_size == b->ob_size
921 && *a->ob_sval == *b->ob_sval
922 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923}
924
Guido van Rossum9bfef441993-03-29 10:43:31 +0000925static long
Fred Drakeba096332000-07-09 07:04:36 +0000926string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000927{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000928 register int len;
929 register unsigned char *p;
930 register long x;
931
932#ifdef CACHE_HASH
933 if (a->ob_shash != -1)
934 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000935#ifdef INTERN_STRINGS
936 if (a->ob_sinterned != NULL)
937 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000939#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000940#endif
941 len = a->ob_size;
942 p = (unsigned char *) a->ob_sval;
943 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000944 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000945 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000946 x ^= a->ob_size;
947 if (x == -1)
948 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000949#ifdef CACHE_HASH
950 a->ob_shash = x;
951#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000952 return x;
953}
954
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000955static int
Fred Drakeba096332000-07-09 07:04:36 +0000956string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000957{
958 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000959 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000960 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000961 return -1;
962 }
963 *ptr = (void *)self->ob_sval;
964 return self->ob_size;
965}
966
967static int
Fred Drakeba096332000-07-09 07:04:36 +0000968string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000969{
Guido van Rossum045e6881997-09-08 18:30:11 +0000970 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000971 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000972 return -1;
973}
974
975static int
Fred Drakeba096332000-07-09 07:04:36 +0000976string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000977{
978 if ( lenp )
979 *lenp = self->ob_size;
980 return 1;
981}
982
Guido van Rossum1db70701998-10-08 02:18:52 +0000983static int
Fred Drakeba096332000-07-09 07:04:36 +0000984string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000985{
986 if ( index != 0 ) {
987 PyErr_SetString(PyExc_SystemError,
988 "accessing non-existent string segment");
989 return -1;
990 }
991 *ptr = self->ob_sval;
992 return self->ob_size;
993}
994
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000996 (inquiry)string_length, /*sq_length*/
997 (binaryfunc)string_concat, /*sq_concat*/
998 (intargfunc)string_repeat, /*sq_repeat*/
999 (intargfunc)string_item, /*sq_item*/
1000 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001001 0, /*sq_ass_item*/
1002 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001003 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004};
1005
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001006static PyBufferProcs string_as_buffer = {
1007 (getreadbufferproc)string_buffer_getreadbuf,
1008 (getwritebufferproc)string_buffer_getwritebuf,
1009 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001010 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001011};
1012
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001013
1014
1015#define LEFTSTRIP 0
1016#define RIGHTSTRIP 1
1017#define BOTHSTRIP 2
1018
1019
1020static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001021split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001023 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001024 PyObject* item;
1025 PyObject *list = PyList_New(0);
1026
1027 if (list == NULL)
1028 return NULL;
1029
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 for (i = j = 0; i < len; ) {
1031 while (i < len && isspace(Py_CHARMASK(s[i])))
1032 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001034 while (i < len && !isspace(Py_CHARMASK(s[i])))
1035 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001036 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001037 if (maxsplit-- <= 0)
1038 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001039 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1040 if (item == NULL)
1041 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001042 err = PyList_Append(list, item);
1043 Py_DECREF(item);
1044 if (err < 0)
1045 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001046 while (i < len && isspace(Py_CHARMASK(s[i])))
1047 i++;
1048 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049 }
1050 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001051 if (j < len) {
1052 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1053 if (item == NULL)
1054 goto finally;
1055 err = PyList_Append(list, item);
1056 Py_DECREF(item);
1057 if (err < 0)
1058 goto finally;
1059 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001060 return list;
1061 finally:
1062 Py_DECREF(list);
1063 return NULL;
1064}
1065
1066
1067static char split__doc__[] =
1068"S.split([sep [,maxsplit]]) -> list of strings\n\
1069\n\
1070Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001071delimiter string. If maxsplit is given, at most maxsplit\n\
1072splits are done. If sep is not specified, any whitespace string\n\
1073is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001074
1075static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001076string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077{
1078 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001079 int maxsplit = -1;
1080 const char *s = PyString_AS_STRING(self), *sub;
1081 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001082
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001084 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001085 if (maxsplit < 0)
1086 maxsplit = INT_MAX;
1087 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 if (PyString_Check(subobj)) {
1090 sub = PyString_AS_STRING(subobj);
1091 n = PyString_GET_SIZE(subobj);
1092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001093#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 else if (PyUnicode_Check(subobj))
1095 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001096#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001097 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1098 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099 if (n == 0) {
1100 PyErr_SetString(PyExc_ValueError, "empty separator");
1101 return NULL;
1102 }
1103
1104 list = PyList_New(0);
1105 if (list == NULL)
1106 return NULL;
1107
1108 i = j = 0;
1109 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001110 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001111 if (maxsplit-- <= 0)
1112 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1114 if (item == NULL)
1115 goto fail;
1116 err = PyList_Append(list, item);
1117 Py_DECREF(item);
1118 if (err < 0)
1119 goto fail;
1120 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121 }
1122 else
1123 i++;
1124 }
1125 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1126 if (item == NULL)
1127 goto fail;
1128 err = PyList_Append(list, item);
1129 Py_DECREF(item);
1130 if (err < 0)
1131 goto fail;
1132
1133 return list;
1134
1135 fail:
1136 Py_DECREF(list);
1137 return NULL;
1138}
1139
1140
1141static char join__doc__[] =
1142"S.join(sequence) -> string\n\
1143\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001144Return a string which is the concatenation of the strings in the\n\
1145sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146
1147static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001148string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001149{
1150 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001151 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001153 char *p;
1154 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001155 size_t sz = 0;
1156 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001157 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001158
Tim Peters19fe14e2001-01-19 03:03:47 +00001159 seq = PySequence_Fast(orig, "");
1160 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001161 if (PyErr_ExceptionMatches(PyExc_TypeError))
1162 PyErr_Format(PyExc_TypeError,
1163 "sequence expected, %.80s found",
1164 orig->ob_type->tp_name);
1165 return NULL;
1166 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001167
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001168 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001169 if (seqlen == 0) {
1170 Py_DECREF(seq);
1171 return PyString_FromString("");
1172 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001174 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001175 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1176 PyErr_Format(PyExc_TypeError,
1177 "sequence item 0: expected string,"
1178 " %.80s found",
1179 item->ob_type->tp_name);
1180 Py_DECREF(seq);
1181 return NULL;
1182 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001183 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001184 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001185 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001187
Tim Peters19fe14e2001-01-19 03:03:47 +00001188 /* There are at least two things to join. Do a pre-pass to figure out
1189 * the total amount of space we'll need (sz), see whether any argument
1190 * is absurd, and defer to the Unicode join if appropriate.
1191 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001192 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001193 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001194 item = PySequence_Fast_GET_ITEM(seq, i);
1195 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001196#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001197 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001198 /* Defer to Unicode join.
1199 * CAUTION: There's no gurantee that the
1200 * original sequence can be iterated over
1201 * again, so we must pass seq here.
1202 */
1203 PyObject *result;
1204 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001205 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001206 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001207 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001208#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001209 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001210 "sequence item %i: expected string,"
1211 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001212 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001213 Py_DECREF(seq);
1214 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001215 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001216 sz += PyString_GET_SIZE(item);
1217 if (i != 0)
1218 sz += seplen;
1219 if (sz < old_sz || sz > INT_MAX) {
1220 PyErr_SetString(PyExc_OverflowError,
1221 "join() is too long for a Python string");
1222 Py_DECREF(seq);
1223 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001225 }
1226
1227 /* Allocate result space. */
1228 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1229 if (res == NULL) {
1230 Py_DECREF(seq);
1231 return NULL;
1232 }
1233
1234 /* Catenate everything. */
1235 p = PyString_AS_STRING(res);
1236 for (i = 0; i < seqlen; ++i) {
1237 size_t n;
1238 item = PySequence_Fast_GET_ITEM(seq, i);
1239 n = PyString_GET_SIZE(item);
1240 memcpy(p, PyString_AS_STRING(item), n);
1241 p += n;
1242 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001243 memcpy(p, sep, seplen);
1244 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001245 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001246 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001247
Jeremy Hylton49048292000-07-11 03:28:17 +00001248 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250}
1251
Tim Peters52e155e2001-06-16 05:42:57 +00001252PyObject *
1253_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001254{
Tim Petersa7259592001-06-16 05:11:17 +00001255 assert(sep != NULL && PyString_Check(sep));
1256 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001257 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001258}
1259
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260static long
Fred Drakeba096332000-07-09 07:04:36 +00001261string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001264 int len = PyString_GET_SIZE(self);
1265 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001267
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001268 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001269 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 return -2;
1271 if (PyString_Check(subobj)) {
1272 sub = PyString_AS_STRING(subobj);
1273 n = PyString_GET_SIZE(subobj);
1274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001275#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 else if (PyUnicode_Check(subobj))
1277 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001278#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 return -2;
1281
1282 if (last > len)
1283 last = len;
1284 if (last < 0)
1285 last += len;
1286 if (last < 0)
1287 last = 0;
1288 if (i < 0)
1289 i += len;
1290 if (i < 0)
1291 i = 0;
1292
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 if (dir > 0) {
1294 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 last -= n;
1297 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001298 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001299 return (long)i;
1300 }
1301 else {
1302 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001303
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 if (n == 0 && i <= last)
1305 return (long)last;
1306 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001307 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001308 return (long)j;
1309 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001310
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 return -1;
1312}
1313
1314
1315static char find__doc__[] =
1316"S.find(sub [,start [,end]]) -> int\n\
1317\n\
1318Return the lowest index in S where substring sub is found,\n\
1319such that sub is contained within s[start,end]. Optional\n\
1320arguments start and end are interpreted as in slice notation.\n\
1321\n\
1322Return -1 on failure.";
1323
1324static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001325string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001327 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 if (result == -2)
1329 return NULL;
1330 return PyInt_FromLong(result);
1331}
1332
1333
1334static char index__doc__[] =
1335"S.index(sub [,start [,end]]) -> int\n\
1336\n\
1337Like S.find() but raise ValueError when the substring is not found.";
1338
1339static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001340string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001342 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343 if (result == -2)
1344 return NULL;
1345 if (result == -1) {
1346 PyErr_SetString(PyExc_ValueError,
1347 "substring not found in string.index");
1348 return NULL;
1349 }
1350 return PyInt_FromLong(result);
1351}
1352
1353
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354static char rfind__doc__[] =
1355"S.rfind(sub [,start [,end]]) -> int\n\
1356\n\
1357Return the highest index in S where substring sub is found,\n\
1358such that sub is contained within s[start,end]. Optional\n\
1359arguments start and end are interpreted as in slice notation.\n\
1360\n\
1361Return -1 on failure.";
1362
1363static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001364string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001366 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 if (result == -2)
1368 return NULL;
1369 return PyInt_FromLong(result);
1370}
1371
1372
1373static char rindex__doc__[] =
1374"S.rindex(sub [,start [,end]]) -> int\n\
1375\n\
1376Like S.rfind() but raise ValueError when the substring is not found.";
1377
1378static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001379string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382 if (result == -2)
1383 return NULL;
1384 if (result == -1) {
1385 PyErr_SetString(PyExc_ValueError,
1386 "substring not found in string.rindex");
1387 return NULL;
1388 }
1389 return PyInt_FromLong(result);
1390}
1391
1392
1393static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001394do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395{
1396 char *s = PyString_AS_STRING(self);
1397 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 i = 0;
1400 if (striptype != RIGHTSTRIP) {
1401 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1402 i++;
1403 }
1404 }
1405
1406 j = len;
1407 if (striptype != LEFTSTRIP) {
1408 do {
1409 j--;
1410 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1411 j++;
1412 }
1413
Tim Peters8fa5dd02001-09-12 02:18:30 +00001414 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 Py_INCREF(self);
1416 return (PyObject*)self;
1417 }
1418 else
1419 return PyString_FromStringAndSize(s+i, j-i);
1420}
1421
1422
1423static char strip__doc__[] =
1424"S.strip() -> string\n\
1425\n\
1426Return a copy of the string S with leading and trailing\n\
1427whitespace removed.";
1428
1429static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001430string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001432 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433}
1434
1435
1436static char lstrip__doc__[] =
1437"S.lstrip() -> string\n\
1438\n\
1439Return a copy of the string S with leading whitespace removed.";
1440
1441static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001442string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001444 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445}
1446
1447
1448static char rstrip__doc__[] =
1449"S.rstrip() -> string\n\
1450\n\
1451Return a copy of the string S with trailing whitespace removed.";
1452
1453static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001454string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001456 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457}
1458
1459
1460static char lower__doc__[] =
1461"S.lower() -> string\n\
1462\n\
1463Return a copy of the string S converted to lowercase.";
1464
1465static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001466string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467{
1468 char *s = PyString_AS_STRING(self), *s_new;
1469 int i, n = PyString_GET_SIZE(self);
1470 PyObject *new;
1471
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 new = PyString_FromStringAndSize(NULL, n);
1473 if (new == NULL)
1474 return NULL;
1475 s_new = PyString_AsString(new);
1476 for (i = 0; i < n; i++) {
1477 int c = Py_CHARMASK(*s++);
1478 if (isupper(c)) {
1479 *s_new = tolower(c);
1480 } else
1481 *s_new = c;
1482 s_new++;
1483 }
1484 return new;
1485}
1486
1487
1488static char upper__doc__[] =
1489"S.upper() -> string\n\
1490\n\
1491Return a copy of the string S converted to uppercase.";
1492
1493static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001494string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495{
1496 char *s = PyString_AS_STRING(self), *s_new;
1497 int i, n = PyString_GET_SIZE(self);
1498 PyObject *new;
1499
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 new = PyString_FromStringAndSize(NULL, n);
1501 if (new == NULL)
1502 return NULL;
1503 s_new = PyString_AsString(new);
1504 for (i = 0; i < n; i++) {
1505 int c = Py_CHARMASK(*s++);
1506 if (islower(c)) {
1507 *s_new = toupper(c);
1508 } else
1509 *s_new = c;
1510 s_new++;
1511 }
1512 return new;
1513}
1514
1515
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516static char title__doc__[] =
1517"S.title() -> string\n\
1518\n\
1519Return a titlecased version of S, i.e. words start with uppercase\n\
1520characters, all remaining cased characters have lowercase.";
1521
1522static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001523string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001524{
1525 char *s = PyString_AS_STRING(self), *s_new;
1526 int i, n = PyString_GET_SIZE(self);
1527 int previous_is_cased = 0;
1528 PyObject *new;
1529
Guido van Rossum4c08d552000-03-10 22:55:18 +00001530 new = PyString_FromStringAndSize(NULL, n);
1531 if (new == NULL)
1532 return NULL;
1533 s_new = PyString_AsString(new);
1534 for (i = 0; i < n; i++) {
1535 int c = Py_CHARMASK(*s++);
1536 if (islower(c)) {
1537 if (!previous_is_cased)
1538 c = toupper(c);
1539 previous_is_cased = 1;
1540 } else if (isupper(c)) {
1541 if (previous_is_cased)
1542 c = tolower(c);
1543 previous_is_cased = 1;
1544 } else
1545 previous_is_cased = 0;
1546 *s_new++ = c;
1547 }
1548 return new;
1549}
1550
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551static char capitalize__doc__[] =
1552"S.capitalize() -> string\n\
1553\n\
1554Return a copy of the string S with only its first character\n\
1555capitalized.";
1556
1557static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001558string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559{
1560 char *s = PyString_AS_STRING(self), *s_new;
1561 int i, n = PyString_GET_SIZE(self);
1562 PyObject *new;
1563
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564 new = PyString_FromStringAndSize(NULL, n);
1565 if (new == NULL)
1566 return NULL;
1567 s_new = PyString_AsString(new);
1568 if (0 < n) {
1569 int c = Py_CHARMASK(*s++);
1570 if (islower(c))
1571 *s_new = toupper(c);
1572 else
1573 *s_new = c;
1574 s_new++;
1575 }
1576 for (i = 1; i < n; i++) {
1577 int c = Py_CHARMASK(*s++);
1578 if (isupper(c))
1579 *s_new = tolower(c);
1580 else
1581 *s_new = c;
1582 s_new++;
1583 }
1584 return new;
1585}
1586
1587
1588static char count__doc__[] =
1589"S.count(sub[, start[, end]]) -> int\n\
1590\n\
1591Return the number of occurrences of substring sub in string\n\
1592S[start:end]. Optional arguments start and end are\n\
1593interpreted as in slice notation.";
1594
1595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001596string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001598 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 int len = PyString_GET_SIZE(self), n;
1600 int i = 0, last = INT_MAX;
1601 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603
Guido van Rossumc6821402000-05-08 14:08:05 +00001604 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1605 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001607
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 if (PyString_Check(subobj)) {
1609 sub = PyString_AS_STRING(subobj);
1610 n = PyString_GET_SIZE(subobj);
1611 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001612#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001613 else if (PyUnicode_Check(subobj)) {
1614 int count;
1615 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1616 if (count == -1)
1617 return NULL;
1618 else
1619 return PyInt_FromLong((long) count);
1620 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001621#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1623 return NULL;
1624
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625 if (last > len)
1626 last = len;
1627 if (last < 0)
1628 last += len;
1629 if (last < 0)
1630 last = 0;
1631 if (i < 0)
1632 i += len;
1633 if (i < 0)
1634 i = 0;
1635 m = last + 1 - n;
1636 if (n == 0)
1637 return PyInt_FromLong((long) (m-i));
1638
1639 r = 0;
1640 while (i < m) {
1641 if (!memcmp(s+i, sub, n)) {
1642 r++;
1643 i += n;
1644 } else {
1645 i++;
1646 }
1647 }
1648 return PyInt_FromLong((long) r);
1649}
1650
1651
1652static char swapcase__doc__[] =
1653"S.swapcase() -> string\n\
1654\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001655Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656converted to lowercase and vice versa.";
1657
1658static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001659string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001660{
1661 char *s = PyString_AS_STRING(self), *s_new;
1662 int i, n = PyString_GET_SIZE(self);
1663 PyObject *new;
1664
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 new = PyString_FromStringAndSize(NULL, n);
1666 if (new == NULL)
1667 return NULL;
1668 s_new = PyString_AsString(new);
1669 for (i = 0; i < n; i++) {
1670 int c = Py_CHARMASK(*s++);
1671 if (islower(c)) {
1672 *s_new = toupper(c);
1673 }
1674 else if (isupper(c)) {
1675 *s_new = tolower(c);
1676 }
1677 else
1678 *s_new = c;
1679 s_new++;
1680 }
1681 return new;
1682}
1683
1684
1685static char translate__doc__[] =
1686"S.translate(table [,deletechars]) -> string\n\
1687\n\
1688Return a copy of the string S, where all characters occurring\n\
1689in the optional argument deletechars are removed, and the\n\
1690remaining characters have been mapped through the given\n\
1691translation table, which must be a string of length 256.";
1692
1693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001694string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696 register char *input, *output;
1697 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 register int i, c, changed = 0;
1699 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 int inlen, tablen, dellen = 0;
1702 PyObject *result;
1703 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705
Guido van Rossum4c08d552000-03-10 22:55:18 +00001706 if (!PyArg_ParseTuple(args, "O|O:translate",
1707 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709
1710 if (PyString_Check(tableobj)) {
1711 table1 = PyString_AS_STRING(tableobj);
1712 tablen = PyString_GET_SIZE(tableobj);
1713 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001714#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001716 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001717 parameter; instead a mapping to None will cause characters
1718 to be deleted. */
1719 if (delobj != NULL) {
1720 PyErr_SetString(PyExc_TypeError,
1721 "deletions are implemented differently for unicode");
1722 return NULL;
1723 }
1724 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1725 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001726#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001727 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001729
1730 if (delobj != NULL) {
1731 if (PyString_Check(delobj)) {
1732 del_table = PyString_AS_STRING(delobj);
1733 dellen = PyString_GET_SIZE(delobj);
1734 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001735#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 else if (PyUnicode_Check(delobj)) {
1737 PyErr_SetString(PyExc_TypeError,
1738 "deletions are implemented differently for unicode");
1739 return NULL;
1740 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001741#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001742 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1743 return NULL;
1744
1745 if (tablen != 256) {
1746 PyErr_SetString(PyExc_ValueError,
1747 "translation table must be 256 characters long");
1748 return NULL;
1749 }
1750 }
1751 else {
1752 del_table = NULL;
1753 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754 }
1755
1756 table = table1;
1757 inlen = PyString_Size(input_obj);
1758 result = PyString_FromStringAndSize((char *)NULL, inlen);
1759 if (result == NULL)
1760 return NULL;
1761 output_start = output = PyString_AsString(result);
1762 input = PyString_AsString(input_obj);
1763
1764 if (dellen == 0) {
1765 /* If no deletions are required, use faster code */
1766 for (i = inlen; --i >= 0; ) {
1767 c = Py_CHARMASK(*input++);
1768 if (Py_CHARMASK((*output++ = table[c])) != c)
1769 changed = 1;
1770 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001771 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772 return result;
1773 Py_DECREF(result);
1774 Py_INCREF(input_obj);
1775 return input_obj;
1776 }
1777
1778 for (i = 0; i < 256; i++)
1779 trans_table[i] = Py_CHARMASK(table[i]);
1780
1781 for (i = 0; i < dellen; i++)
1782 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1783
1784 for (i = inlen; --i >= 0; ) {
1785 c = Py_CHARMASK(*input++);
1786 if (trans_table[c] != -1)
1787 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1788 continue;
1789 changed = 1;
1790 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001791 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 Py_DECREF(result);
1793 Py_INCREF(input_obj);
1794 return input_obj;
1795 }
1796 /* Fix the size of the resulting string */
1797 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1798 return NULL;
1799 return result;
1800}
1801
1802
1803/* What follows is used for implementing replace(). Perry Stoll. */
1804
1805/*
1806 mymemfind
1807
1808 strstr replacement for arbitrary blocks of memory.
1809
Barry Warsaw51ac5802000-03-20 16:36:48 +00001810 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811 contents of memory pointed to by PAT. Returns the index into MEM if
1812 found, or -1 if not found. If len of PAT is greater than length of
1813 MEM, the function returns -1.
1814*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001815static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001816mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
1818 register int ii;
1819
1820 /* pattern can not occur in the last pat_len-1 chars */
1821 len -= pat_len;
1822
1823 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001824 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 return ii;
1826 }
1827 }
1828 return -1;
1829}
1830
1831/*
1832 mymemcnt
1833
1834 Return the number of distinct times PAT is found in MEM.
1835 meaning mem=1111 and pat==11 returns 2.
1836 mem=11111 and pat==11 also return 2.
1837 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001838static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001839mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840{
1841 register int offset = 0;
1842 int nfound = 0;
1843
1844 while (len >= 0) {
1845 offset = mymemfind(mem, len, pat, pat_len);
1846 if (offset == -1)
1847 break;
1848 mem += offset + pat_len;
1849 len -= offset + pat_len;
1850 nfound++;
1851 }
1852 return nfound;
1853}
1854
1855/*
1856 mymemreplace
1857
Thomas Wouters7e474022000-07-16 12:04:32 +00001858 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859 replaced with SUB.
1860
Thomas Wouters7e474022000-07-16 12:04:32 +00001861 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862 of PAT in STR, then the original string is returned. Otherwise, a new
1863 string is allocated here and returned.
1864
1865 on return, out_len is:
1866 the length of output string, or
1867 -1 if the input string is returned, or
1868 unchanged if an error occurs (no memory).
1869
1870 return value is:
1871 the new string allocated locally, or
1872 NULL if an error occurred.
1873*/
1874static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001875mymemreplace(const char *str, int len, /* input string */
1876 const char *pat, int pat_len, /* pattern string to find */
1877 const char *sub, int sub_len, /* substitution string */
1878 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001879 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880{
1881 char *out_s;
1882 char *new_s;
1883 int nfound, offset, new_len;
1884
1885 if (len == 0 || pat_len > len)
1886 goto return_same;
1887
1888 /* find length of output string */
1889 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001890 if (count < 0)
1891 count = INT_MAX;
1892 else if (nfound > count)
1893 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 if (nfound == 0)
1895 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001896
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001898 if (new_len == 0) {
1899 /* Have to allocate something for the caller to free(). */
1900 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001901 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001902 return NULL;
1903 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001905 else {
1906 assert(new_len > 0);
1907 new_s = (char *)PyMem_MALLOC(new_len);
1908 if (new_s == NULL)
1909 return NULL;
1910 out_s = new_s;
1911
Tim Peters9c012af2001-05-10 00:32:57 +00001912 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001913 /* find index of next instance of pattern */
1914 offset = mymemfind(str, len, pat, pat_len);
1915 if (offset == -1)
1916 break;
1917
1918 /* copy non matching part of input string */
1919 memcpy(new_s, str, offset);
1920 str += offset + pat_len;
1921 len -= offset + pat_len;
1922
1923 /* copy substitute into the output string */
1924 new_s += offset;
1925 memcpy(new_s, sub, sub_len);
1926 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001927 }
1928 /* copy any remaining values into output string */
1929 if (len > 0)
1930 memcpy(new_s, str, len);
1931 }
1932 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 return out_s;
1934
1935 return_same:
1936 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001937 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938}
1939
1940
1941static char replace__doc__[] =
1942"S.replace (old, new[, maxsplit]) -> string\n\
1943\n\
1944Return a copy of string S with all occurrences of substring\n\
1945old replaced by new. If the optional argument maxsplit is\n\
1946given, only the first maxsplit occurrences are replaced.";
1947
1948static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001949string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 const char *str = PyString_AS_STRING(self), *sub, *repl;
1952 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001953 const int len = PyString_GET_SIZE(self);
1954 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001955 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
Guido van Rossum4c08d552000-03-10 22:55:18 +00001959 if (!PyArg_ParseTuple(args, "OO|i:replace",
1960 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962
1963 if (PyString_Check(subobj)) {
1964 sub = PyString_AS_STRING(subobj);
1965 sub_len = PyString_GET_SIZE(subobj);
1966 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001967#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001968 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001969 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001971#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1973 return NULL;
1974
1975 if (PyString_Check(replobj)) {
1976 repl = PyString_AS_STRING(replobj);
1977 repl_len = PyString_GET_SIZE(replobj);
1978 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001979#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001980 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001981 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001982 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001983#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1985 return NULL;
1986
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001987 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001988 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989 return NULL;
1990 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 if (new_s == NULL) {
1993 PyErr_NoMemory();
1994 return NULL;
1995 }
1996 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001997 if (PyString_CheckExact(self)) {
1998 /* we're returning another reference to self */
1999 new = (PyObject*)self;
2000 Py_INCREF(new);
2001 }
2002 else {
2003 new = PyString_FromStringAndSize(str, len);
2004 if (new == NULL)
2005 return NULL;
2006 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 }
2008 else {
2009 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002010 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 }
2012 return new;
2013}
2014
2015
2016static char startswith__doc__[] =
2017"S.startswith(prefix[, start[, end]]) -> int\n\
2018\n\
2019Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2020optional start, test S beginning at that position. With optional end, stop\n\
2021comparing S at that position.";
2022
2023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002024string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002028 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 int plen;
2030 int start = 0;
2031 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033
Guido van Rossumc6821402000-05-08 14:08:05 +00002034 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2035 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002036 return NULL;
2037 if (PyString_Check(subobj)) {
2038 prefix = PyString_AS_STRING(subobj);
2039 plen = PyString_GET_SIZE(subobj);
2040 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002041#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002042 else if (PyUnicode_Check(subobj)) {
2043 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002044 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002045 subobj, start, end, -1);
2046 if (rc == -1)
2047 return NULL;
2048 else
2049 return PyInt_FromLong((long) rc);
2050 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002051#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053 return NULL;
2054
2055 /* adopt Java semantics for index out of range. it is legal for
2056 * offset to be == plen, but this only returns true if prefix is
2057 * the empty string.
2058 */
2059 if (start < 0 || start+plen > len)
2060 return PyInt_FromLong(0);
2061
2062 if (!memcmp(str+start, prefix, plen)) {
2063 /* did the match end after the specified end? */
2064 if (end < 0)
2065 return PyInt_FromLong(1);
2066 else if (end - start < plen)
2067 return PyInt_FromLong(0);
2068 else
2069 return PyInt_FromLong(1);
2070 }
2071 else return PyInt_FromLong(0);
2072}
2073
2074
2075static char endswith__doc__[] =
2076"S.endswith(suffix[, start[, end]]) -> int\n\
2077\n\
2078Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2079optional start, test S beginning at that position. With optional end, stop\n\
2080comparing S at that position.";
2081
2082static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002083string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002087 const char* suffix;
2088 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089 int start = 0;
2090 int end = -1;
2091 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093
Guido van Rossumc6821402000-05-08 14:08:05 +00002094 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2095 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 return NULL;
2097 if (PyString_Check(subobj)) {
2098 suffix = PyString_AS_STRING(subobj);
2099 slen = PyString_GET_SIZE(subobj);
2100 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002101#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002102 else if (PyUnicode_Check(subobj)) {
2103 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002104 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002105 subobj, start, end, +1);
2106 if (rc == -1)
2107 return NULL;
2108 else
2109 return PyInt_FromLong((long) rc);
2110 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002111#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113 return NULL;
2114
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 return PyInt_FromLong(0);
2117
2118 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122 return PyInt_FromLong(1);
2123 else return PyInt_FromLong(0);
2124}
2125
2126
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002127static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002128"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002129\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002130Encodes S using the codec registered for encoding. encoding defaults\n\
2131to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002132handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2133a ValueError. Other possible values are 'ignore' and 'replace'.";
2134
2135static PyObject *
2136string_encode(PyStringObject *self, PyObject *args)
2137{
2138 char *encoding = NULL;
2139 char *errors = NULL;
2140 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2141 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002142 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2143}
2144
2145
2146static char decode__doc__[] =
2147"S.decode([encoding[,errors]]) -> object\n\
2148\n\
2149Decodes S using the codec registered for encoding. encoding defaults\n\
2150to the default encoding. errors may be given to set a different error\n\
2151handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2152a ValueError. Other possible values are 'ignore' and 'replace'.";
2153
2154static PyObject *
2155string_decode(PyStringObject *self, PyObject *args)
2156{
2157 char *encoding = NULL;
2158 char *errors = NULL;
2159 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2160 return NULL;
2161 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002162}
2163
2164
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165static char expandtabs__doc__[] =
2166"S.expandtabs([tabsize]) -> string\n\
2167\n\
2168Return a copy of S where all tab characters are expanded using spaces.\n\
2169If tabsize is not given, a tab size of 8 characters is assumed.";
2170
2171static PyObject*
2172string_expandtabs(PyStringObject *self, PyObject *args)
2173{
2174 const char *e, *p;
2175 char *q;
2176 int i, j;
2177 PyObject *u;
2178 int tabsize = 8;
2179
2180 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2181 return NULL;
2182
Thomas Wouters7e474022000-07-16 12:04:32 +00002183 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184 i = j = 0;
2185 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2186 for (p = PyString_AS_STRING(self); p < e; p++)
2187 if (*p == '\t') {
2188 if (tabsize > 0)
2189 j += tabsize - (j % tabsize);
2190 }
2191 else {
2192 j++;
2193 if (*p == '\n' || *p == '\r') {
2194 i += j;
2195 j = 0;
2196 }
2197 }
2198
2199 /* Second pass: create output string and fill it */
2200 u = PyString_FromStringAndSize(NULL, i + j);
2201 if (!u)
2202 return NULL;
2203
2204 j = 0;
2205 q = PyString_AS_STRING(u);
2206
2207 for (p = PyString_AS_STRING(self); p < e; p++)
2208 if (*p == '\t') {
2209 if (tabsize > 0) {
2210 i = tabsize - (j % tabsize);
2211 j += i;
2212 while (i--)
2213 *q++ = ' ';
2214 }
2215 }
2216 else {
2217 j++;
2218 *q++ = *p;
2219 if (*p == '\n' || *p == '\r')
2220 j = 0;
2221 }
2222
2223 return u;
2224}
2225
Tim Peters8fa5dd02001-09-12 02:18:30 +00002226static PyObject *
2227pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228{
2229 PyObject *u;
2230
2231 if (left < 0)
2232 left = 0;
2233 if (right < 0)
2234 right = 0;
2235
Tim Peters8fa5dd02001-09-12 02:18:30 +00002236 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237 Py_INCREF(self);
2238 return (PyObject *)self;
2239 }
2240
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002241 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 left + PyString_GET_SIZE(self) + right);
2243 if (u) {
2244 if (left)
2245 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002246 memcpy(PyString_AS_STRING(u) + left,
2247 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248 PyString_GET_SIZE(self));
2249 if (right)
2250 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2251 fill, right);
2252 }
2253
2254 return u;
2255}
2256
2257static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002258"S.ljust(width) -> string\n"
2259"\n"
2260"Return S left justified in a string of length width. Padding is\n"
2261"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262
2263static PyObject *
2264string_ljust(PyStringObject *self, PyObject *args)
2265{
2266 int width;
2267 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2268 return NULL;
2269
Tim Peters8fa5dd02001-09-12 02:18:30 +00002270 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271 Py_INCREF(self);
2272 return (PyObject*) self;
2273 }
2274
2275 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2276}
2277
2278
2279static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002280"S.rjust(width) -> string\n"
2281"\n"
2282"Return S right justified in a string of length width. Padding is\n"
2283"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284
2285static PyObject *
2286string_rjust(PyStringObject *self, PyObject *args)
2287{
2288 int width;
2289 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2290 return NULL;
2291
Tim Peters8fa5dd02001-09-12 02:18:30 +00002292 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 Py_INCREF(self);
2294 return (PyObject*) self;
2295 }
2296
2297 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2298}
2299
2300
2301static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002302"S.center(width) -> string\n"
2303"\n"
2304"Return S centered in a string of length width. Padding is done\n"
2305"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306
2307static PyObject *
2308string_center(PyStringObject *self, PyObject *args)
2309{
2310 int marg, left;
2311 int width;
2312
2313 if (!PyArg_ParseTuple(args, "i:center", &width))
2314 return NULL;
2315
Tim Peters8fa5dd02001-09-12 02:18:30 +00002316 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 Py_INCREF(self);
2318 return (PyObject*) self;
2319 }
2320
2321 marg = width - PyString_GET_SIZE(self);
2322 left = marg / 2 + (marg & width & 1);
2323
2324 return pad(self, left, marg - left, ' ');
2325}
2326
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002328"S.isspace() -> int\n"
2329"\n"
2330"Return 1 if there are only whitespace characters in S,\n"
2331"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002332
2333static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002334string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335{
Fred Drakeba096332000-07-09 07:04:36 +00002336 register const unsigned char *p
2337 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002338 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339
Guido van Rossum4c08d552000-03-10 22:55:18 +00002340 /* Shortcut for single character strings */
2341 if (PyString_GET_SIZE(self) == 1 &&
2342 isspace(*p))
2343 return PyInt_FromLong(1);
2344
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002345 /* Special case for empty strings */
2346 if (PyString_GET_SIZE(self) == 0)
2347 return PyInt_FromLong(0);
2348
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 e = p + PyString_GET_SIZE(self);
2350 for (; p < e; p++) {
2351 if (!isspace(*p))
2352 return PyInt_FromLong(0);
2353 }
2354 return PyInt_FromLong(1);
2355}
2356
2357
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002358static char isalpha__doc__[] =
2359"S.isalpha() -> int\n\
2360\n\
2361Return 1 if all characters in S are alphabetic\n\
2362and there is at least one character in S, 0 otherwise.";
2363
2364static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002365string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002366{
Fred Drakeba096332000-07-09 07:04:36 +00002367 register const unsigned char *p
2368 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002369 register const unsigned char *e;
2370
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002371 /* Shortcut for single character strings */
2372 if (PyString_GET_SIZE(self) == 1 &&
2373 isalpha(*p))
2374 return PyInt_FromLong(1);
2375
2376 /* Special case for empty strings */
2377 if (PyString_GET_SIZE(self) == 0)
2378 return PyInt_FromLong(0);
2379
2380 e = p + PyString_GET_SIZE(self);
2381 for (; p < e; p++) {
2382 if (!isalpha(*p))
2383 return PyInt_FromLong(0);
2384 }
2385 return PyInt_FromLong(1);
2386}
2387
2388
2389static char isalnum__doc__[] =
2390"S.isalnum() -> int\n\
2391\n\
2392Return 1 if all characters in S are alphanumeric\n\
2393and there is at least one character in S, 0 otherwise.";
2394
2395static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002396string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002397{
Fred Drakeba096332000-07-09 07:04:36 +00002398 register const unsigned char *p
2399 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002400 register const unsigned char *e;
2401
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002402 /* Shortcut for single character strings */
2403 if (PyString_GET_SIZE(self) == 1 &&
2404 isalnum(*p))
2405 return PyInt_FromLong(1);
2406
2407 /* Special case for empty strings */
2408 if (PyString_GET_SIZE(self) == 0)
2409 return PyInt_FromLong(0);
2410
2411 e = p + PyString_GET_SIZE(self);
2412 for (; p < e; p++) {
2413 if (!isalnum(*p))
2414 return PyInt_FromLong(0);
2415 }
2416 return PyInt_FromLong(1);
2417}
2418
2419
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420static char isdigit__doc__[] =
2421"S.isdigit() -> int\n\
2422\n\
2423Return 1 if there are only digit characters in S,\n\
24240 otherwise.";
2425
2426static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002427string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428{
Fred Drakeba096332000-07-09 07:04:36 +00002429 register const unsigned char *p
2430 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002431 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 /* Shortcut for single character strings */
2434 if (PyString_GET_SIZE(self) == 1 &&
2435 isdigit(*p))
2436 return PyInt_FromLong(1);
2437
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002438 /* Special case for empty strings */
2439 if (PyString_GET_SIZE(self) == 0)
2440 return PyInt_FromLong(0);
2441
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 e = p + PyString_GET_SIZE(self);
2443 for (; p < e; p++) {
2444 if (!isdigit(*p))
2445 return PyInt_FromLong(0);
2446 }
2447 return PyInt_FromLong(1);
2448}
2449
2450
2451static char islower__doc__[] =
2452"S.islower() -> int\n\
2453\n\
2454Return 1 if all cased characters in S are lowercase and there is\n\
2455at least one cased character in S, 0 otherwise.";
2456
2457static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002458string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459{
Fred Drakeba096332000-07-09 07:04:36 +00002460 register const unsigned char *p
2461 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002462 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002463 int cased;
2464
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 /* Shortcut for single character strings */
2466 if (PyString_GET_SIZE(self) == 1)
2467 return PyInt_FromLong(islower(*p) != 0);
2468
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002469 /* Special case for empty strings */
2470 if (PyString_GET_SIZE(self) == 0)
2471 return PyInt_FromLong(0);
2472
Guido van Rossum4c08d552000-03-10 22:55:18 +00002473 e = p + PyString_GET_SIZE(self);
2474 cased = 0;
2475 for (; p < e; p++) {
2476 if (isupper(*p))
2477 return PyInt_FromLong(0);
2478 else if (!cased && islower(*p))
2479 cased = 1;
2480 }
2481 return PyInt_FromLong(cased);
2482}
2483
2484
2485static char isupper__doc__[] =
2486"S.isupper() -> int\n\
2487\n\
2488Return 1 if all cased characters in S are uppercase and there is\n\
2489at least one cased character in S, 0 otherwise.";
2490
2491static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002492string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002493{
Fred Drakeba096332000-07-09 07:04:36 +00002494 register const unsigned char *p
2495 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002496 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002497 int cased;
2498
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 /* Shortcut for single character strings */
2500 if (PyString_GET_SIZE(self) == 1)
2501 return PyInt_FromLong(isupper(*p) != 0);
2502
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002503 /* Special case for empty strings */
2504 if (PyString_GET_SIZE(self) == 0)
2505 return PyInt_FromLong(0);
2506
Guido van Rossum4c08d552000-03-10 22:55:18 +00002507 e = p + PyString_GET_SIZE(self);
2508 cased = 0;
2509 for (; p < e; p++) {
2510 if (islower(*p))
2511 return PyInt_FromLong(0);
2512 else if (!cased && isupper(*p))
2513 cased = 1;
2514 }
2515 return PyInt_FromLong(cased);
2516}
2517
2518
2519static char istitle__doc__[] =
2520"S.istitle() -> int\n\
2521\n\
2522Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2523may only follow uncased characters and lowercase characters only cased\n\
2524ones. Return 0 otherwise.";
2525
2526static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002527string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528{
Fred Drakeba096332000-07-09 07:04:36 +00002529 register const unsigned char *p
2530 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002531 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 int cased, previous_is_cased;
2533
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 /* Shortcut for single character strings */
2535 if (PyString_GET_SIZE(self) == 1)
2536 return PyInt_FromLong(isupper(*p) != 0);
2537
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002538 /* Special case for empty strings */
2539 if (PyString_GET_SIZE(self) == 0)
2540 return PyInt_FromLong(0);
2541
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 e = p + PyString_GET_SIZE(self);
2543 cased = 0;
2544 previous_is_cased = 0;
2545 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002546 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547
2548 if (isupper(ch)) {
2549 if (previous_is_cased)
2550 return PyInt_FromLong(0);
2551 previous_is_cased = 1;
2552 cased = 1;
2553 }
2554 else if (islower(ch)) {
2555 if (!previous_is_cased)
2556 return PyInt_FromLong(0);
2557 previous_is_cased = 1;
2558 cased = 1;
2559 }
2560 else
2561 previous_is_cased = 0;
2562 }
2563 return PyInt_FromLong(cased);
2564}
2565
2566
2567static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002568"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569\n\
2570Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002571Line breaks are not included in the resulting list unless keepends\n\
2572is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573
2574#define SPLIT_APPEND(data, left, right) \
2575 str = PyString_FromStringAndSize(data + left, right - left); \
2576 if (!str) \
2577 goto onError; \
2578 if (PyList_Append(list, str)) { \
2579 Py_DECREF(str); \
2580 goto onError; \
2581 } \
2582 else \
2583 Py_DECREF(str);
2584
2585static PyObject*
2586string_splitlines(PyStringObject *self, PyObject *args)
2587{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588 register int i;
2589 register int j;
2590 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002591 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592 PyObject *list;
2593 PyObject *str;
2594 char *data;
2595
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002596 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002597 return NULL;
2598
2599 data = PyString_AS_STRING(self);
2600 len = PyString_GET_SIZE(self);
2601
Guido van Rossum4c08d552000-03-10 22:55:18 +00002602 list = PyList_New(0);
2603 if (!list)
2604 goto onError;
2605
2606 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002607 int eol;
2608
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609 /* Find a line and append it */
2610 while (i < len && data[i] != '\n' && data[i] != '\r')
2611 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002612
2613 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002614 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615 if (i < len) {
2616 if (data[i] == '\r' && i + 1 < len &&
2617 data[i+1] == '\n')
2618 i += 2;
2619 else
2620 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002621 if (keepends)
2622 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002623 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002624 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002625 j = i;
2626 }
2627 if (j < len) {
2628 SPLIT_APPEND(data, j, len);
2629 }
2630
2631 return list;
2632
2633 onError:
2634 Py_DECREF(list);
2635 return NULL;
2636}
2637
2638#undef SPLIT_APPEND
2639
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002640
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002641static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643 /* Counterparts of the obsolete stropmodule functions; except
2644 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002645 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2646 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2647 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2648 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2649 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2650 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2651 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2652 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2653 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2654 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2655 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2656 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2657 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2658 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2659 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2660 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2661 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2662 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2663 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2664 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2665 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2666 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2667 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2668 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2669 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2670 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2671 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2672 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2673 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2674 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2675 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2676 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2677 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002679 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002681 {NULL, NULL} /* sentinel */
2682};
2683
Guido van Rossumae960af2001-08-30 03:11:59 +00002684staticforward PyObject *
2685str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2686
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002688string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002689{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002690 PyObject *x = NULL;
2691 static char *kwlist[] = {"object", 0};
2692
Guido van Rossumae960af2001-08-30 03:11:59 +00002693 if (type != &PyString_Type)
2694 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002695 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2696 return NULL;
2697 if (x == NULL)
2698 return PyString_FromString("");
2699 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002700}
2701
Guido van Rossumae960af2001-08-30 03:11:59 +00002702static PyObject *
2703str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2704{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002705 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002706 int n;
2707
2708 assert(PyType_IsSubtype(type, &PyString_Type));
2709 tmp = string_new(&PyString_Type, args, kwds);
2710 if (tmp == NULL)
2711 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002712 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002713 n = PyString_GET_SIZE(tmp);
2714 pnew = type->tp_alloc(type, n);
2715 if (pnew != NULL) {
2716 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2717#ifdef CACHE_HASH
2718 ((PyStringObject *)pnew)->ob_shash =
2719 ((PyStringObject *)tmp)->ob_shash;
2720#endif
2721#ifdef INTERN_STRINGS
2722 ((PyStringObject *)pnew)->ob_sinterned =
2723 ((PyStringObject *)tmp)->ob_sinterned;
2724#endif
2725 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002726 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002727 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002728}
2729
Tim Peters6d6c1a32001-08-02 04:15:00 +00002730static char string_doc[] =
2731"str(object) -> string\n\
2732\n\
2733Return a nice string representation of the object.\n\
2734If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002735
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002736PyTypeObject PyString_Type = {
2737 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002738 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002739 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002740 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002741 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002742 (destructor)string_dealloc, /* tp_dealloc */
2743 (printfunc)string_print, /* tp_print */
2744 0, /* tp_getattr */
2745 0, /* tp_setattr */
2746 0, /* tp_compare */
2747 (reprfunc)string_repr, /* tp_repr */
2748 0, /* tp_as_number */
2749 &string_as_sequence, /* tp_as_sequence */
2750 0, /* tp_as_mapping */
2751 (hashfunc)string_hash, /* tp_hash */
2752 0, /* tp_call */
2753 (reprfunc)string_str, /* tp_str */
2754 PyObject_GenericGetAttr, /* tp_getattro */
2755 0, /* tp_setattro */
2756 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002757 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002758 string_doc, /* tp_doc */
2759 0, /* tp_traverse */
2760 0, /* tp_clear */
2761 (richcmpfunc)string_richcompare, /* tp_richcompare */
2762 0, /* tp_weaklistoffset */
2763 0, /* tp_iter */
2764 0, /* tp_iternext */
2765 string_methods, /* tp_methods */
2766 0, /* tp_members */
2767 0, /* tp_getset */
2768 0, /* tp_base */
2769 0, /* tp_dict */
2770 0, /* tp_descr_get */
2771 0, /* tp_descr_set */
2772 0, /* tp_dictoffset */
2773 0, /* tp_init */
2774 0, /* tp_alloc */
2775 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002776 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002777};
2778
2779void
Fred Drakeba096332000-07-09 07:04:36 +00002780PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002781{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002783 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002784 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 if (w == NULL || !PyString_Check(*pv)) {
2786 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002787 *pv = NULL;
2788 return;
2789 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 v = string_concat((PyStringObject *) *pv, w);
2791 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002792 *pv = v;
2793}
2794
Guido van Rossum013142a1994-08-30 08:19:36 +00002795void
Fred Drakeba096332000-07-09 07:04:36 +00002796PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002797{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 PyString_Concat(pv, w);
2799 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002800}
2801
2802
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803/* The following function breaks the notion that strings are immutable:
2804 it changes the size of a string. We get away with this only if there
2805 is only one module referencing the object. You can also think of it
2806 as creating a new string object and destroying the old one, only
2807 more efficiently. In any case, don't use this if the string may
2808 already be known to some other part of the code... */
2809
2810int
Fred Drakeba096332000-07-09 07:04:36 +00002811_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002812{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002813 register PyObject *v;
2814 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002815 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002816 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002817 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 Py_DECREF(v);
2819 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002820 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002821 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002822 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002823#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002824 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002825#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002826 _Py_ForgetReference(v);
2827 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002828 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002829 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002830 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002831 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002832 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002833 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002834 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002835 _Py_NewReference(*pv);
2836 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002837 sv->ob_size = newsize;
2838 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002839 return 0;
2840}
Guido van Rossume5372401993-03-16 12:15:04 +00002841
2842/* Helpers for formatstring */
2843
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002844static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002845getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002846{
2847 int argidx = *p_argidx;
2848 if (argidx < arglen) {
2849 (*p_argidx)++;
2850 if (arglen < 0)
2851 return args;
2852 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002853 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002854 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002855 PyErr_SetString(PyExc_TypeError,
2856 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002857 return NULL;
2858}
2859
Tim Peters38fd5b62000-09-21 05:43:11 +00002860/* Format codes
2861 * F_LJUST '-'
2862 * F_SIGN '+'
2863 * F_BLANK ' '
2864 * F_ALT '#'
2865 * F_ZERO '0'
2866 */
Guido van Rossume5372401993-03-16 12:15:04 +00002867#define F_LJUST (1<<0)
2868#define F_SIGN (1<<1)
2869#define F_BLANK (1<<2)
2870#define F_ALT (1<<3)
2871#define F_ZERO (1<<4)
2872
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002873static int
Fred Drakeba096332000-07-09 07:04:36 +00002874formatfloat(char *buf, size_t buflen, int flags,
2875 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002876{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002877 /* fmt = '%#.' + `prec` + `type`
2878 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002879 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002880 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002881 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002882 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002883 if (prec < 0)
2884 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002885 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2886 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002887 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2888 (flags&F_ALT) ? "#" : "",
2889 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002890 /* worst case length calc to ensure no buffer overrun:
2891 fmt = %#.<prec>g
2892 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002893 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002894 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2895 If prec=0 the effective precision is 1 (the leading digit is
2896 always given), therefore increase by one to 10+prec. */
2897 if (buflen <= (size_t)10 + (size_t)prec) {
2898 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002899 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002900 return -1;
2901 }
Tim Peters885d4572001-11-28 20:27:42 +00002902 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002903 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002904}
2905
Tim Peters38fd5b62000-09-21 05:43:11 +00002906/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2907 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2908 * Python's regular ints.
2909 * Return value: a new PyString*, or NULL if error.
2910 * . *pbuf is set to point into it,
2911 * *plen set to the # of chars following that.
2912 * Caller must decref it when done using pbuf.
2913 * The string starting at *pbuf is of the form
2914 * "-"? ("0x" | "0X")? digit+
2915 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002916 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002917 * There will be at least prec digits, zero-filled on the left if
2918 * necessary to get that many.
2919 * val object to be converted
2920 * flags bitmask of format flags; only F_ALT is looked at
2921 * prec minimum number of digits; 0-fill on left if needed
2922 * type a character in [duoxX]; u acts the same as d
2923 *
2924 * CAUTION: o, x and X conversions on regular ints can never
2925 * produce a '-' sign, but can for Python's unbounded ints.
2926 */
2927PyObject*
2928_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2929 char **pbuf, int *plen)
2930{
2931 PyObject *result = NULL;
2932 char *buf;
2933 int i;
2934 int sign; /* 1 if '-', else 0 */
2935 int len; /* number of characters */
2936 int numdigits; /* len == numnondigits + numdigits */
2937 int numnondigits = 0;
2938
2939 switch (type) {
2940 case 'd':
2941 case 'u':
2942 result = val->ob_type->tp_str(val);
2943 break;
2944 case 'o':
2945 result = val->ob_type->tp_as_number->nb_oct(val);
2946 break;
2947 case 'x':
2948 case 'X':
2949 numnondigits = 2;
2950 result = val->ob_type->tp_as_number->nb_hex(val);
2951 break;
2952 default:
2953 assert(!"'type' not in [duoxX]");
2954 }
2955 if (!result)
2956 return NULL;
2957
2958 /* To modify the string in-place, there can only be one reference. */
2959 if (result->ob_refcnt != 1) {
2960 PyErr_BadInternalCall();
2961 return NULL;
2962 }
2963 buf = PyString_AsString(result);
2964 len = PyString_Size(result);
2965 if (buf[len-1] == 'L') {
2966 --len;
2967 buf[len] = '\0';
2968 }
2969 sign = buf[0] == '-';
2970 numnondigits += sign;
2971 numdigits = len - numnondigits;
2972 assert(numdigits > 0);
2973
Tim Petersfff53252001-04-12 18:38:48 +00002974 /* Get rid of base marker unless F_ALT */
2975 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002976 /* Need to skip 0x, 0X or 0. */
2977 int skipped = 0;
2978 switch (type) {
2979 case 'o':
2980 assert(buf[sign] == '0');
2981 /* If 0 is only digit, leave it alone. */
2982 if (numdigits > 1) {
2983 skipped = 1;
2984 --numdigits;
2985 }
2986 break;
2987 case 'x':
2988 case 'X':
2989 assert(buf[sign] == '0');
2990 assert(buf[sign + 1] == 'x');
2991 skipped = 2;
2992 numnondigits -= 2;
2993 break;
2994 }
2995 if (skipped) {
2996 buf += skipped;
2997 len -= skipped;
2998 if (sign)
2999 buf[0] = '-';
3000 }
3001 assert(len == numnondigits + numdigits);
3002 assert(numdigits > 0);
3003 }
3004
3005 /* Fill with leading zeroes to meet minimum width. */
3006 if (prec > numdigits) {
3007 PyObject *r1 = PyString_FromStringAndSize(NULL,
3008 numnondigits + prec);
3009 char *b1;
3010 if (!r1) {
3011 Py_DECREF(result);
3012 return NULL;
3013 }
3014 b1 = PyString_AS_STRING(r1);
3015 for (i = 0; i < numnondigits; ++i)
3016 *b1++ = *buf++;
3017 for (i = 0; i < prec - numdigits; i++)
3018 *b1++ = '0';
3019 for (i = 0; i < numdigits; i++)
3020 *b1++ = *buf++;
3021 *b1 = '\0';
3022 Py_DECREF(result);
3023 result = r1;
3024 buf = PyString_AS_STRING(result);
3025 len = numnondigits + prec;
3026 }
3027
3028 /* Fix up case for hex conversions. */
3029 switch (type) {
3030 case 'x':
3031 /* Need to convert all upper case letters to lower case. */
3032 for (i = 0; i < len; i++)
3033 if (buf[i] >= 'A' && buf[i] <= 'F')
3034 buf[i] += 'a'-'A';
3035 break;
3036 case 'X':
3037 /* Need to convert 0x to 0X (and -0x to -0X). */
3038 if (buf[sign + 1] == 'x')
3039 buf[sign + 1] = 'X';
3040 break;
3041 }
3042 *pbuf = buf;
3043 *plen = len;
3044 return result;
3045}
3046
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003047static int
Fred Drakeba096332000-07-09 07:04:36 +00003048formatint(char *buf, size_t buflen, int flags,
3049 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003050{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003051 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003052 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3053 + 1 + 1 = 24 */
3054 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003055 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003056 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003057 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003058 if (prec < 0)
3059 prec = 1;
Tim Peters885d4572001-11-28 20:27:42 +00003060 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3061 (flags&F_ALT) ? "#" : "",
3062 prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003063 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003064 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003065 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003066 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003067 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003068 return -1;
3069 }
Tim Peters885d4572001-11-28 20:27:42 +00003070 PyOS_snprintf(buf, buflen, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003071 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3072 * but we want it (for consistency with other %#x conversions, and
3073 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003074 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3075 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3076 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003077 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 if (x == 0 &&
3079 (flags & F_ALT) &&
3080 (type == 'x' || type == 'X') &&
3081 buf[1] != (char)type) /* this last always true under std C */
3082 {
Tim Petersfff53252001-04-12 18:38:48 +00003083 memmove(buf+2, buf, strlen(buf) + 1);
3084 buf[0] = '0';
3085 buf[1] = (char)type;
3086 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003087 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003088}
3089
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003090static int
Fred Drakeba096332000-07-09 07:04:36 +00003091formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003092{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003093 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003094 if (PyString_Check(v)) {
3095 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003096 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003097 }
3098 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003099 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003100 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003101 }
3102 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003103 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003104}
3105
Guido van Rossum013142a1994-08-30 08:19:36 +00003106
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003107/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3108
3109 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3110 chars are formatted. XXX This is a magic number. Each formatting
3111 routine does bounds checking to ensure no overflow, but a better
3112 solution may be to malloc a buffer of appropriate size for each
3113 format. For now, the current solution is sufficient.
3114*/
3115#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003116
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003117PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003118PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003119{
3120 char *fmt, *res;
3121 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003122 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003123 PyObject *result, *orig_args;
3124#ifdef Py_USING_UNICODE
3125 PyObject *v, *w;
3126#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003127 PyObject *dict = NULL;
3128 if (format == NULL || !PyString_Check(format) || args == NULL) {
3129 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003130 return NULL;
3131 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003132 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133 fmt = PyString_AsString(format);
3134 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003135 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003137 if (result == NULL)
3138 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003139 res = PyString_AsString(result);
3140 if (PyTuple_Check(args)) {
3141 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003142 argidx = 0;
3143 }
3144 else {
3145 arglen = -1;
3146 argidx = -2;
3147 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003148 if (args->ob_type->tp_as_mapping)
3149 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003150 while (--fmtcnt >= 0) {
3151 if (*fmt != '%') {
3152 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003153 rescnt = fmtcnt + 100;
3154 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003155 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003156 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003157 res = PyString_AsString(result)
3158 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003159 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003160 }
3161 *res++ = *fmt++;
3162 }
3163 else {
3164 /* Got a format specifier */
3165 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003166 int width = -1;
3167 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003168 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003169 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003170 PyObject *v = NULL;
3171 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003172 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003173 int sign;
3174 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003175 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003176#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003177 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003178 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003179#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003180
Guido van Rossumda9c2711996-12-05 21:58:58 +00003181 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003182 if (*fmt == '(') {
3183 char *keystart;
3184 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003185 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003186 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003187
3188 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003189 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003190 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003191 goto error;
3192 }
3193 ++fmt;
3194 --fmtcnt;
3195 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003196 /* Skip over balanced parentheses */
3197 while (pcount > 0 && --fmtcnt >= 0) {
3198 if (*fmt == ')')
3199 --pcount;
3200 else if (*fmt == '(')
3201 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003202 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003203 }
3204 keylen = fmt - keystart - 1;
3205 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003206 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003207 "incomplete format key");
3208 goto error;
3209 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003210 key = PyString_FromStringAndSize(keystart,
3211 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003212 if (key == NULL)
3213 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003214 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003216 args_owned = 0;
3217 }
3218 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003219 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003220 if (args == NULL) {
3221 goto error;
3222 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003223 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003224 arglen = -1;
3225 argidx = -2;
3226 }
Guido van Rossume5372401993-03-16 12:15:04 +00003227 while (--fmtcnt >= 0) {
3228 switch (c = *fmt++) {
3229 case '-': flags |= F_LJUST; continue;
3230 case '+': flags |= F_SIGN; continue;
3231 case ' ': flags |= F_BLANK; continue;
3232 case '#': flags |= F_ALT; continue;
3233 case '0': flags |= F_ZERO; continue;
3234 }
3235 break;
3236 }
3237 if (c == '*') {
3238 v = getnextarg(args, arglen, &argidx);
3239 if (v == NULL)
3240 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003241 if (!PyInt_Check(v)) {
3242 PyErr_SetString(PyExc_TypeError,
3243 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003244 goto error;
3245 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003247 if (width < 0) {
3248 flags |= F_LJUST;
3249 width = -width;
3250 }
Guido van Rossume5372401993-03-16 12:15:04 +00003251 if (--fmtcnt >= 0)
3252 c = *fmt++;
3253 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003254 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003255 width = c - '0';
3256 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003257 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003258 if (!isdigit(c))
3259 break;
3260 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 PyErr_SetString(
3262 PyExc_ValueError,
3263 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003264 goto error;
3265 }
3266 width = width*10 + (c - '0');
3267 }
3268 }
3269 if (c == '.') {
3270 prec = 0;
3271 if (--fmtcnt >= 0)
3272 c = *fmt++;
3273 if (c == '*') {
3274 v = getnextarg(args, arglen, &argidx);
3275 if (v == NULL)
3276 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 if (!PyInt_Check(v)) {
3278 PyErr_SetString(
3279 PyExc_TypeError,
3280 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003281 goto error;
3282 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003284 if (prec < 0)
3285 prec = 0;
3286 if (--fmtcnt >= 0)
3287 c = *fmt++;
3288 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003289 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003290 prec = c - '0';
3291 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003292 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003293 if (!isdigit(c))
3294 break;
3295 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 PyErr_SetString(
3297 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003298 "prec too big");
3299 goto error;
3300 }
3301 prec = prec*10 + (c - '0');
3302 }
3303 }
3304 } /* prec */
3305 if (fmtcnt >= 0) {
3306 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003307 if (--fmtcnt >= 0)
3308 c = *fmt++;
3309 }
3310 }
3311 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 PyErr_SetString(PyExc_ValueError,
3313 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003314 goto error;
3315 }
3316 if (c != '%') {
3317 v = getnextarg(args, arglen, &argidx);
3318 if (v == NULL)
3319 goto error;
3320 }
3321 sign = 0;
3322 fill = ' ';
3323 switch (c) {
3324 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003325 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003326 len = 1;
3327 break;
3328 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003329 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003330#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003331 if (PyUnicode_Check(v)) {
3332 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003333 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003334 goto unicode;
3335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003336#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003337 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003338 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003339 else
3340 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003341 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003342 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003343 if (!PyString_Check(temp)) {
3344 PyErr_SetString(PyExc_TypeError,
3345 "%s argument has non-string str()");
3346 goto error;
3347 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003348 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003349 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003350 if (prec >= 0 && len > prec)
3351 len = prec;
3352 break;
3353 case 'i':
3354 case 'd':
3355 case 'u':
3356 case 'o':
3357 case 'x':
3358 case 'X':
3359 if (c == 'i')
3360 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003361 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003362 temp = _PyString_FormatLong(v, flags,
3363 prec, c, &pbuf, &len);
3364 if (!temp)
3365 goto error;
3366 /* unbounded ints can always produce
3367 a sign character! */
3368 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003369 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003370 else {
3371 pbuf = formatbuf;
3372 len = formatint(pbuf, sizeof(formatbuf),
3373 flags, prec, c, v);
3374 if (len < 0)
3375 goto error;
3376 /* only d conversion is signed */
3377 sign = c == 'd';
3378 }
3379 if (flags & F_ZERO)
3380 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003381 break;
3382 case 'e':
3383 case 'E':
3384 case 'f':
3385 case 'g':
3386 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003387 pbuf = formatbuf;
3388 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003389 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003390 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003391 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003392 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003393 fill = '0';
3394 break;
3395 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003396 pbuf = formatbuf;
3397 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003398 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003399 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003400 break;
3401 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003402 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003403 "unsupported format character '%c' (0x%x) "
3404 "at index %i",
3405 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003406 goto error;
3407 }
3408 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003409 if (*pbuf == '-' || *pbuf == '+') {
3410 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003411 len--;
3412 }
3413 else if (flags & F_SIGN)
3414 sign = '+';
3415 else if (flags & F_BLANK)
3416 sign = ' ';
3417 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003418 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003419 }
3420 if (width < len)
3421 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003422 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003423 reslen -= rescnt;
3424 rescnt = width + fmtcnt + 100;
3425 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003426 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003427 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003428 res = PyString_AsString(result)
3429 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003430 }
3431 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003432 if (fill != ' ')
3433 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003434 rescnt--;
3435 if (width > len)
3436 width--;
3437 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003438 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3439 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003440 assert(pbuf[1] == c);
3441 if (fill != ' ') {
3442 *res++ = *pbuf++;
3443 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003444 }
Tim Petersfff53252001-04-12 18:38:48 +00003445 rescnt -= 2;
3446 width -= 2;
3447 if (width < 0)
3448 width = 0;
3449 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003450 }
3451 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003452 do {
3453 --rescnt;
3454 *res++ = fill;
3455 } while (--width > len);
3456 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003457 if (fill == ' ') {
3458 if (sign)
3459 *res++ = sign;
3460 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003461 (c == 'x' || c == 'X')) {
3462 assert(pbuf[0] == '0');
3463 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003464 *res++ = *pbuf++;
3465 *res++ = *pbuf++;
3466 }
3467 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003468 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003469 res += len;
3470 rescnt -= len;
3471 while (--width >= len) {
3472 --rescnt;
3473 *res++ = ' ';
3474 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003475 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003476 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003477 "not all arguments converted");
3478 goto error;
3479 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003480 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003481 } /* '%' */
3482 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003483 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003484 PyErr_SetString(PyExc_TypeError,
3485 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003486 goto error;
3487 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003488 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003489 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003490 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003491 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003492 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003493
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003494#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003495 unicode:
3496 if (args_owned) {
3497 Py_DECREF(args);
3498 args_owned = 0;
3499 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003500 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003501 if (PyTuple_Check(orig_args) && argidx > 0) {
3502 PyObject *v;
3503 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3504 v = PyTuple_New(n);
3505 if (v == NULL)
3506 goto error;
3507 while (--n >= 0) {
3508 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3509 Py_INCREF(w);
3510 PyTuple_SET_ITEM(v, n, w);
3511 }
3512 args = v;
3513 } else {
3514 Py_INCREF(orig_args);
3515 args = orig_args;
3516 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003517 args_owned = 1;
3518 /* Take what we have of the result and let the Unicode formatting
3519 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003520 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003521 if (_PyString_Resize(&result, rescnt))
3522 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003523 fmtcnt = PyString_GET_SIZE(format) - \
3524 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003525 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3526 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003527 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003528 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003529 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003530 if (v == NULL)
3531 goto error;
3532 /* Paste what we have (result) to what the Unicode formatting
3533 function returned (v) and return the result (or error) */
3534 w = PyUnicode_Concat(result, v);
3535 Py_DECREF(result);
3536 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003537 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003538 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003539#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003540
Guido van Rossume5372401993-03-16 12:15:04 +00003541 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003543 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003544 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003545 }
Guido van Rossume5372401993-03-16 12:15:04 +00003546 return NULL;
3547}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003548
3549
3550#ifdef INTERN_STRINGS
3551
Barry Warsaw4df762f2000-08-16 23:41:01 +00003552/* This dictionary will leak at PyString_Fini() time. That's acceptable
3553 * because PyString_Fini() specifically frees interned strings that are
3554 * only referenced by this dictionary. The CVS log entry for revision 2.45
3555 * says:
3556 *
3557 * Change the Fini function to only remove otherwise unreferenced
3558 * strings from the interned table. There are references in
3559 * hard-to-find static variables all over the interpreter, and it's not
3560 * worth trying to get rid of all those; but "uninterning" isn't fair
3561 * either and may cause subtle failures later -- so we have to keep them
3562 * in the interned table.
3563 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003564static PyObject *interned;
3565
3566void
Fred Drakeba096332000-07-09 07:04:36 +00003567PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003568{
3569 register PyStringObject *s = (PyStringObject *)(*p);
3570 PyObject *t;
3571 if (s == NULL || !PyString_Check(s))
3572 Py_FatalError("PyString_InternInPlace: strings only please!");
3573 if ((t = s->ob_sinterned) != NULL) {
3574 if (t == (PyObject *)s)
3575 return;
3576 Py_INCREF(t);
3577 *p = t;
3578 Py_DECREF(s);
3579 return;
3580 }
3581 if (interned == NULL) {
3582 interned = PyDict_New();
3583 if (interned == NULL)
3584 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003585 }
3586 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3587 Py_INCREF(t);
3588 *p = s->ob_sinterned = t;
3589 Py_DECREF(s);
3590 return;
3591 }
Tim Peters111f6092001-09-12 07:54:51 +00003592 /* Ensure that only true string objects appear in the intern dict,
3593 and as the value of ob_sinterned. */
3594 if (PyString_CheckExact(s)) {
3595 t = (PyObject *)s;
3596 if (PyDict_SetItem(interned, t, t) == 0) {
3597 s->ob_sinterned = t;
3598 return;
3599 }
3600 }
3601 else {
3602 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3603 PyString_GET_SIZE(s));
3604 if (t != NULL) {
3605 if (PyDict_SetItem(interned, t, t) == 0) {
3606 *p = s->ob_sinterned = t;
3607 Py_DECREF(s);
3608 return;
3609 }
3610 Py_DECREF(t);
3611 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003612 }
3613 PyErr_Clear();
3614}
3615
3616
3617PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003618PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003619{
3620 PyObject *s = PyString_FromString(cp);
3621 if (s == NULL)
3622 return NULL;
3623 PyString_InternInPlace(&s);
3624 return s;
3625}
3626
3627#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003628
3629void
Fred Drakeba096332000-07-09 07:04:36 +00003630PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003631{
3632 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003633 for (i = 0; i < UCHAR_MAX + 1; i++) {
3634 Py_XDECREF(characters[i]);
3635 characters[i] = NULL;
3636 }
3637#ifndef DONT_SHARE_SHORT_STRINGS
3638 Py_XDECREF(nullstring);
3639 nullstring = NULL;
3640#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003641#ifdef INTERN_STRINGS
3642 if (interned) {
3643 int pos, changed;
3644 PyObject *key, *value;
3645 do {
3646 changed = 0;
3647 pos = 0;
3648 while (PyDict_Next(interned, &pos, &key, &value)) {
3649 if (key->ob_refcnt == 2 && key == value) {
3650 PyDict_DelItem(interned, key);
3651 changed = 1;
3652 }
3653 }
3654 } while (changed);
3655 }
3656#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003657}
Barry Warsawa903ad982001-02-23 16:40:48 +00003658
3659#ifdef INTERN_STRINGS
3660void _Py_ReleaseInternedStrings(void)
3661{
3662 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003663 fprintf(stderr, "releasing interned strings\n");
3664 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003665 Py_DECREF(interned);
3666 interned = NULL;
3667 }
3668}
3669#endif /* INTERN_STRINGS */