blob: 657b20093b92c39445678405e7f7a3cab245846d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
Martin v. Löwisd1327502001-12-02 18:09:41 +000022 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
23 to share string objects. When the size of the string is zero, these
24 routines always return a pointer to the same string object; when the size
25 is one, they return a pointer to an already existing object if the contents
26 of the string is known. For PyString_FromString() this is always the case,
27 for PyString_FromStringAndSize() this is the case when the first argument
28 in not NULL.
29
30 A common practice of allocating a string and then filling it in or changing
31 it must be done carefully. It is only allowed to change the contents of
32 the string if the object was gotten from PyString_FromStringAndSize() with
33 a NULL first argument, because in the future these routines may try to do
34 even more sharing of objects.
35
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036 The string in the `str' parameter does not have to be null-character
37 terminated. (Therefore it is safe to construct a substring by using
38 `PyString_FromStringAndSize(origstring, substrlen)'.)
39
40 The parameter `size' denotes number of characters to allocate, not
41 counting the null terminating character. If the `str' argument is
42 not NULL, then it points to a of length `size'. For
43 PyString_FromString, this string must be null-terminated.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
45 The member `op->ob_size' denotes the number of bytes of data in the string,
46 not counting the null terminating character, and is therefore equal to the
47 `size' parameter.
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000050PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000051{
Tim Peters9e897f42001-05-09 07:37:07 +000052 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 if (size == 0 && (op = nullstring) != NULL) {
55#ifdef COUNT_ALLOCS
56 null_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 if (size == 1 && str != NULL &&
62 (op = characters[*str & UCHAR_MAX]) != NULL)
63 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064#ifdef COUNT_ALLOCS
65 one_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000070#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
72 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000074 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef CACHE_HASH
79 op->ob_shash = -1;
80#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000081#ifdef INTERN_STRINGS
82 op->ob_sinterned = NULL;
83#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000087#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000101#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000113 if (size > INT_MAX) {
114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000118#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 if (size == 0 && (op = nullstring) != NULL) {
120#ifdef COUNT_ALLOCS
121 null_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
126 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
127#ifdef COUNT_ALLOCS
128 one_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000133#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000134
135 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000138 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000140 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141#ifdef CACHE_HASH
142 op->ob_shash = -1;
143#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000144#ifdef INTERN_STRINGS
145 op->ob_sinterned = NULL;
146#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000147 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000148#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000150 PyObject *t = (PyObject *)op;
151 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000152 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000154 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000155 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000158 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000159 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000160 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000161 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000162#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000164}
165
Barry Warsawdadace02001-08-24 18:32:06 +0000166PyObject *
167PyString_FromFormatV(const char *format, va_list vargs)
168{
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000170 int n = 0;
171 const char* f;
172 char *s;
173 PyObject* string;
174
Tim Petersc15c4f12001-10-02 21:32:07 +0000175#ifdef VA_LIST_IS_ARRAY
176 memcpy(count, vargs, sizeof(va_list));
177#else
178 count = vargs;
179#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185 ;
186
187 /* skip the 'l' in %ld, since it doesn't change the
188 width. although only %d is supported (see
189 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000190 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000191 if (*f == 'l' && *(f+1) == 'd')
192 ++f;
193
194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'i': case 'x':
202 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000218 */
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
230 }
231 } else
232 n++;
233 }
234 expand:
235 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000238 string = PyString_FromStringAndSize(NULL, n);
239 if (!string)
240 return NULL;
241
242 s = PyString_AsString(string);
243
244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 int i, longflag = 0;
248 /* parse the width.precision part (we're only
249 interested in the precision value, if any) */
250 n = 0;
251 while (isdigit(Py_CHARMASK(*f)))
252 n = (n*10) + *f++ - '0';
253 if (*f == '.') {
254 f++;
255 n = 0;
256 while (isdigit(Py_CHARMASK(*f)))
257 n = (n*10) + *f++ - '0';
258 }
259 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
260 f++;
261 /* handle the long flag, but only for %ld. others
262 can be added when necessary. */
263 if (*f == 'l' && *(f+1) == 'd') {
264 longflag = 1;
265 ++f;
266 }
267
268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else
276 sprintf(s, "%d", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'i':
280 sprintf(s, "%i", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 'x':
284 sprintf(s, "%x", va_arg(vargs, int));
285 s += strlen(s);
286 break;
287 case 's':
288 p = va_arg(vargs, char*);
289 i = strlen(p);
290 if (n > 0 && i > n)
291 i = n;
292 memcpy(s, p, i);
293 s += i;
294 break;
295 case 'p':
296 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000297 /* %p is ill-defined: ensure leading 0x. */
298 if (s[1] == 'X')
299 s[1] = 'x';
300 else if (s[1] != 'x') {
301 memmove(s+2, s, strlen(s)+1);
302 s[0] = '0';
303 s[1] = 'x';
304 }
Barry Warsawdadace02001-08-24 18:32:06 +0000305 s += strlen(s);
306 break;
307 case '%':
308 *s++ = '%';
309 break;
310 default:
311 strcpy(s, p);
312 s += strlen(s);
313 goto end;
314 }
315 } else
316 *s++ = *f;
317 }
318
319 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000320 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000321 return string;
322}
323
324PyObject *
325PyString_FromFormat(const char *format, ...)
326{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000327 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000328 va_list vargs;
329
330#ifdef HAVE_STDARG_PROTOTYPES
331 va_start(vargs, format);
332#else
333 va_start(vargs);
334#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 ret = PyString_FromFormatV(format, vargs);
336 va_end(vargs);
337 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000338}
339
340
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000341PyObject *PyString_Decode(const char *s,
342 int size,
343 const char *encoding,
344 const char *errors)
345{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000346 PyObject *v, *str;
347
348 str = PyString_FromStringAndSize(s, size);
349 if (str == NULL)
350 return NULL;
351 v = PyString_AsDecodedString(str, encoding, errors);
352 Py_DECREF(str);
353 return v;
354}
355
356PyObject *PyString_AsDecodedObject(PyObject *str,
357 const char *encoding,
358 const char *errors)
359{
360 PyObject *v;
361
362 if (!PyString_Check(str)) {
363 PyErr_BadArgument();
364 goto onError;
365 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000366
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000367 if (encoding == NULL) {
368#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000369 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000370#else
371 PyErr_SetString(PyExc_ValueError, "no encoding specified");
372 goto onError;
373#endif
374 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375
376 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000377 v = PyCodec_Decode(str, encoding, errors);
378 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380
381 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000382
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000384 return NULL;
385}
386
387PyObject *PyString_AsDecodedString(PyObject *str,
388 const char *encoding,
389 const char *errors)
390{
391 PyObject *v;
392
393 v = PyString_AsDecodedObject(str, encoding, errors);
394 if (v == NULL)
395 goto onError;
396
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000397#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 /* Convert Unicode to a string using the default encoding */
399 if (PyUnicode_Check(v)) {
400 PyObject *temp = v;
401 v = PyUnicode_AsEncodedString(v, NULL, NULL);
402 Py_DECREF(temp);
403 if (v == NULL)
404 goto onError;
405 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000406#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000407 if (!PyString_Check(v)) {
408 PyErr_Format(PyExc_TypeError,
409 "decoder did not return a string object (type=%.400s)",
410 v->ob_type->tp_name);
411 Py_DECREF(v);
412 goto onError;
413 }
414
415 return v;
416
417 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 return NULL;
419}
420
421PyObject *PyString_Encode(const char *s,
422 int size,
423 const char *encoding,
424 const char *errors)
425{
426 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000427
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 str = PyString_FromStringAndSize(s, size);
429 if (str == NULL)
430 return NULL;
431 v = PyString_AsEncodedString(str, encoding, errors);
432 Py_DECREF(str);
433 return v;
434}
435
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000436PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 if (!PyString_Check(str)) {
443 PyErr_BadArgument();
444 goto onError;
445 }
446
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000447 if (encoding == NULL) {
448#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000449 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000450#else
451 PyErr_SetString(PyExc_ValueError, "no encoding specified");
452 goto onError;
453#endif
454 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455
456 /* Encode via the codec registry */
457 v = PyCodec_Encode(str, encoding, errors);
458 if (v == NULL)
459 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000460
461 return v;
462
463 onError:
464 return NULL;
465}
466
467PyObject *PyString_AsEncodedString(PyObject *str,
468 const char *encoding,
469 const char *errors)
470{
471 PyObject *v;
472
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000473 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474 if (v == NULL)
475 goto onError;
476
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000477#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 /* Convert Unicode to a string using the default encoding */
479 if (PyUnicode_Check(v)) {
480 PyObject *temp = v;
481 v = PyUnicode_AsEncodedString(v, NULL, NULL);
482 Py_DECREF(temp);
483 if (v == NULL)
484 goto onError;
485 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000486#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 if (!PyString_Check(v)) {
488 PyErr_Format(PyExc_TypeError,
489 "encoder did not return a string object (type=%.400s)",
490 v->ob_type->tp_name);
491 Py_DECREF(v);
492 goto onError;
493 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000494
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000495 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000496
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 onError:
498 return NULL;
499}
500
Guido van Rossum234f9421993-06-17 12:35:49 +0000501static void
Fred Drakeba096332000-07-09 07:04:36 +0000502string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000503{
Guido van Rossum9475a232001-10-05 20:51:39 +0000504 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000505}
506
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000507static int
508string_getsize(register PyObject *op)
509{
510 char *s;
511 int len;
512 if (PyString_AsStringAndSize(op, &s, &len))
513 return -1;
514 return len;
515}
516
517static /*const*/ char *
518string_getbuffer(register PyObject *op)
519{
520 char *s;
521 int len;
522 if (PyString_AsStringAndSize(op, &s, &len))
523 return NULL;
524 return s;
525}
526
Guido van Rossumd7047b31995-01-02 19:07:15 +0000527int
Fred Drakeba096332000-07-09 07:04:36 +0000528PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (!PyString_Check(op))
531 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000532 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533}
534
535/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000536PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 if (!PyString_Check(op))
539 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000540 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000541}
542
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543int
544PyString_AsStringAndSize(register PyObject *obj,
545 register char **s,
546 register int *len)
547{
548 if (s == NULL) {
549 PyErr_BadInternalCall();
550 return -1;
551 }
552
553 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000554#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000555 if (PyUnicode_Check(obj)) {
556 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
557 if (obj == NULL)
558 return -1;
559 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000560 else
561#endif
562 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000563 PyErr_Format(PyExc_TypeError,
564 "expected string or Unicode object, "
565 "%.200s found", obj->ob_type->tp_name);
566 return -1;
567 }
568 }
569
570 *s = PyString_AS_STRING(obj);
571 if (len != NULL)
572 *len = PyString_GET_SIZE(obj);
573 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
574 PyErr_SetString(PyExc_TypeError,
575 "expected string without null bytes");
576 return -1;
577 }
578 return 0;
579}
580
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000581/* Methods */
582
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000583static int
Fred Drakeba096332000-07-09 07:04:36 +0000584string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585{
586 int i;
587 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000589
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000590 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000591 if (! PyString_CheckExact(op)) {
592 int ret;
593 /* A str subclass may have its own __str__ method. */
594 op = (PyStringObject *) PyObject_Str((PyObject *)op);
595 if (op == NULL)
596 return -1;
597 ret = string_print(op, fp, flags);
598 Py_DECREF(op);
599 return ret;
600 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000601 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000602 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000603 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000605
Thomas Wouters7e474022000-07-16 12:04:32 +0000606 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 quote = '\'';
608 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
609 quote = '"';
610
611 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 for (i = 0; i < op->ob_size; i++) {
613 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000614 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000616 else if (c == '\t')
617 fprintf(fp, "\\t");
618 else if (c == '\n')
619 fprintf(fp, "\\n");
620 else if (c == '\r')
621 fprintf(fp, "\\r");
622 else if (c < ' ' || c >= 0x7f)
623 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000625 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000627 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000628 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629}
630
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000631static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000632string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000633{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000634 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
635 PyObject *v;
636 if (newsize > INT_MAX) {
637 PyErr_SetString(PyExc_OverflowError,
638 "string is too large to make repr");
639 }
640 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000641 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000642 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
644 else {
645 register int i;
646 register char c;
647 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000648 int quote;
649
Thomas Wouters7e474022000-07-16 12:04:32 +0000650 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000651 quote = '\'';
652 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
653 quote = '"';
654
Tim Peters9161c8b2001-12-03 01:55:38 +0000655 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000656 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000657 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000658 /* There's at least enough room for a hex escape
659 and a closing quote. */
660 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000662 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000664 else if (c == '\t')
665 *p++ = '\\', *p++ = 't';
666 else if (c == '\n')
667 *p++ = '\\', *p++ = 'n';
668 else if (c == '\r')
669 *p++ = '\\', *p++ = 'r';
670 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000671 /* For performance, we don't want to call
672 PyOS_snprintf here (extra layers of
673 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000674 sprintf(p, "\\x%02x", c & 0xff);
675 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676 }
677 else
678 *p++ = c;
679 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000680 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000681 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000682 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000683 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000684 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000685 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000686 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687}
688
Guido van Rossum189f1df2001-05-01 16:51:53 +0000689static PyObject *
690string_str(PyObject *s)
691{
Tim Petersc9933152001-10-16 20:18:24 +0000692 assert(PyString_Check(s));
693 if (PyString_CheckExact(s)) {
694 Py_INCREF(s);
695 return s;
696 }
697 else {
698 /* Subtype -- return genuine string with the same value. */
699 PyStringObject *t = (PyStringObject *) s;
700 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
701 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000702}
703
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704static int
Fred Drakeba096332000-07-09 07:04:36 +0000705string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706{
707 return a->ob_size;
708}
709
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000711string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
713 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 register PyStringObject *op;
715 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000717 if (PyUnicode_Check(bb))
718 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000719#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000720 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000721 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000722 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723 return NULL;
724 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000727 if ((a->ob_size == 0 || b->ob_size == 0) &&
728 PyString_CheckExact(a) && PyString_CheckExact(b)) {
729 if (a->ob_size == 0) {
730 Py_INCREF(bb);
731 return bb;
732 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 Py_INCREF(a);
734 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735 }
736 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000737 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000738 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000739 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000740 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000741 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000742 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000743#ifdef CACHE_HASH
744 op->ob_shash = -1;
745#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000746#ifdef INTERN_STRINGS
747 op->ob_sinterned = NULL;
748#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000749 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
750 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
751 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000753#undef b
754}
755
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000756static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000757string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000758{
759 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000760 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000761 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000762 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000763 if (n < 0)
764 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000765 /* watch out for overflows: the size can overflow int,
766 * and the # of bytes needed can overflow size_t
767 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000769 if (n && size / n != a->ob_size) {
770 PyErr_SetString(PyExc_OverflowError,
771 "repeated string is too long");
772 return NULL;
773 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000774 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000775 Py_INCREF(a);
776 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 }
Tim Peters8f422462000-09-09 06:13:41 +0000778 nbytes = size * sizeof(char);
779 if (nbytes / sizeof(char) != (size_t)size ||
780 nbytes + sizeof(PyStringObject) <= nbytes) {
781 PyErr_SetString(PyExc_OverflowError,
782 "repeated string is too long");
783 return NULL;
784 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000786 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000787 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000788 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000789 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000790#ifdef CACHE_HASH
791 op->ob_shash = -1;
792#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000793#ifdef INTERN_STRINGS
794 op->ob_sinterned = NULL;
795#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000796 for (i = 0; i < size; i += a->ob_size)
797 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
798 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000799 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
802/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
803
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000804static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000805string_slice(register PyStringObject *a, register int i, register int j)
806 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807{
808 if (i < 0)
809 i = 0;
810 if (j < 0)
811 j = 0; /* Avoid signed/unsigned bug in next line */
812 if (j > a->ob_size)
813 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000814 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
815 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000816 Py_INCREF(a);
817 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000818 }
819 if (j < i)
820 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000821 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822}
823
Guido van Rossum9284a572000-03-07 15:53:43 +0000824static int
Fred Drakeba096332000-07-09 07:04:36 +0000825string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000826{
827 register char *s, *end;
828 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000829#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000830 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000831 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000832#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000833 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000834 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000835 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000836 return -1;
837 }
838 c = PyString_AsString(el)[0];
839 s = PyString_AsString(a);
840 end = s + PyString_Size(a);
841 while (s < end) {
842 if (c == *s++)
843 return 1;
844 }
845 return 0;
846}
847
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000848static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000849string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000852 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000854 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 return NULL;
856 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000857 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000858 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000859 if (v == NULL)
860 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000861 else {
862#ifdef COUNT_ALLOCS
863 one_strings++;
864#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000865 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000866 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000867 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868}
869
Martin v. Löwiscd353062001-05-24 16:56:35 +0000870static PyObject*
871string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000873 int c;
874 int len_a, len_b;
875 int min_len;
876 PyObject *result;
877
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000878 /* Make sure both arguments are strings. */
879 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000880 result = Py_NotImplemented;
881 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000882 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000883 if (a == b) {
884 switch (op) {
885 case Py_EQ:case Py_LE:case Py_GE:
886 result = Py_True;
887 goto out;
888 case Py_NE:case Py_LT:case Py_GT:
889 result = Py_False;
890 goto out;
891 }
892 }
893 if (op == Py_EQ) {
894 /* Supporting Py_NE here as well does not save
895 much time, since Py_NE is rarely used. */
896 if (a->ob_size == b->ob_size
897 && (a->ob_sval[0] == b->ob_sval[0]
898 && memcmp(a->ob_sval, b->ob_sval,
899 a->ob_size) == 0)) {
900 result = Py_True;
901 } else {
902 result = Py_False;
903 }
904 goto out;
905 }
906 len_a = a->ob_size; len_b = b->ob_size;
907 min_len = (len_a < len_b) ? len_a : len_b;
908 if (min_len > 0) {
909 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
910 if (c==0)
911 c = memcmp(a->ob_sval, b->ob_sval, min_len);
912 }else
913 c = 0;
914 if (c == 0)
915 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
916 switch (op) {
917 case Py_LT: c = c < 0; break;
918 case Py_LE: c = c <= 0; break;
919 case Py_EQ: assert(0); break; /* unreachable */
920 case Py_NE: c = c != 0; break;
921 case Py_GT: c = c > 0; break;
922 case Py_GE: c = c >= 0; break;
923 default:
924 result = Py_NotImplemented;
925 goto out;
926 }
927 result = c ? Py_True : Py_False;
928 out:
929 Py_INCREF(result);
930 return result;
931}
932
933int
934_PyString_Eq(PyObject *o1, PyObject *o2)
935{
936 PyStringObject *a, *b;
937 a = (PyStringObject*)o1;
938 b = (PyStringObject*)o2;
939 return a->ob_size == b->ob_size
940 && *a->ob_sval == *b->ob_sval
941 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942}
943
Guido van Rossum9bfef441993-03-29 10:43:31 +0000944static long
Fred Drakeba096332000-07-09 07:04:36 +0000945string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000946{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000947 register int len;
948 register unsigned char *p;
949 register long x;
950
951#ifdef CACHE_HASH
952 if (a->ob_shash != -1)
953 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000954#ifdef INTERN_STRINGS
955 if (a->ob_sinterned != NULL)
956 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000958#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959#endif
960 len = a->ob_size;
961 p = (unsigned char *) a->ob_sval;
962 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000963 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000964 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000965 x ^= a->ob_size;
966 if (x == -1)
967 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000968#ifdef CACHE_HASH
969 a->ob_shash = x;
970#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000971 return x;
972}
973
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000974static int
Fred Drakeba096332000-07-09 07:04:36 +0000975string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000976{
977 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000978 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000979 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980 return -1;
981 }
982 *ptr = (void *)self->ob_sval;
983 return self->ob_size;
984}
985
986static int
Fred Drakeba096332000-07-09 07:04:36 +0000987string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000988{
Guido van Rossum045e6881997-09-08 18:30:11 +0000989 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000990 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000991 return -1;
992}
993
994static int
Fred Drakeba096332000-07-09 07:04:36 +0000995string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000996{
997 if ( lenp )
998 *lenp = self->ob_size;
999 return 1;
1000}
1001
Guido van Rossum1db70701998-10-08 02:18:52 +00001002static int
Fred Drakeba096332000-07-09 07:04:36 +00001003string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001004{
1005 if ( index != 0 ) {
1006 PyErr_SetString(PyExc_SystemError,
1007 "accessing non-existent string segment");
1008 return -1;
1009 }
1010 *ptr = self->ob_sval;
1011 return self->ob_size;
1012}
1013
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001014static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001015 (inquiry)string_length, /*sq_length*/
1016 (binaryfunc)string_concat, /*sq_concat*/
1017 (intargfunc)string_repeat, /*sq_repeat*/
1018 (intargfunc)string_item, /*sq_item*/
1019 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001020 0, /*sq_ass_item*/
1021 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001022 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001023};
1024
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001025static PyBufferProcs string_as_buffer = {
1026 (getreadbufferproc)string_buffer_getreadbuf,
1027 (getwritebufferproc)string_buffer_getwritebuf,
1028 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001029 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001030};
1031
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032
1033
1034#define LEFTSTRIP 0
1035#define RIGHTSTRIP 1
1036#define BOTHSTRIP 2
1037
1038
1039static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001040split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001042 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043 PyObject* item;
1044 PyObject *list = PyList_New(0);
1045
1046 if (list == NULL)
1047 return NULL;
1048
Guido van Rossum4c08d552000-03-10 22:55:18 +00001049 for (i = j = 0; i < len; ) {
1050 while (i < len && isspace(Py_CHARMASK(s[i])))
1051 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 while (i < len && !isspace(Py_CHARMASK(s[i])))
1054 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001056 if (maxsplit-- <= 0)
1057 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001058 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1059 if (item == NULL)
1060 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061 err = PyList_Append(list, item);
1062 Py_DECREF(item);
1063 if (err < 0)
1064 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001065 while (i < len && isspace(Py_CHARMASK(s[i])))
1066 i++;
1067 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068 }
1069 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001070 if (j < len) {
1071 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1072 if (item == NULL)
1073 goto finally;
1074 err = PyList_Append(list, item);
1075 Py_DECREF(item);
1076 if (err < 0)
1077 goto finally;
1078 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001079 return list;
1080 finally:
1081 Py_DECREF(list);
1082 return NULL;
1083}
1084
1085
1086static char split__doc__[] =
1087"S.split([sep [,maxsplit]]) -> list of strings\n\
1088\n\
1089Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090delimiter string. If maxsplit is given, at most maxsplit\n\
1091splits are done. If sep is not specified, any whitespace string\n\
1092is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093
1094static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001095string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096{
1097 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001098 int maxsplit = -1;
1099 const char *s = PyString_AS_STRING(self), *sub;
1100 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001101
Guido van Rossum4c08d552000-03-10 22:55:18 +00001102 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001103 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001104 if (maxsplit < 0)
1105 maxsplit = INT_MAX;
1106 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001107 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001108 if (PyString_Check(subobj)) {
1109 sub = PyString_AS_STRING(subobj);
1110 n = PyString_GET_SIZE(subobj);
1111 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001112#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001113 else if (PyUnicode_Check(subobj))
1114 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001115#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001116 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1117 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118 if (n == 0) {
1119 PyErr_SetString(PyExc_ValueError, "empty separator");
1120 return NULL;
1121 }
1122
1123 list = PyList_New(0);
1124 if (list == NULL)
1125 return NULL;
1126
1127 i = j = 0;
1128 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001129 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001130 if (maxsplit-- <= 0)
1131 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1133 if (item == NULL)
1134 goto fail;
1135 err = PyList_Append(list, item);
1136 Py_DECREF(item);
1137 if (err < 0)
1138 goto fail;
1139 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140 }
1141 else
1142 i++;
1143 }
1144 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1145 if (item == NULL)
1146 goto fail;
1147 err = PyList_Append(list, item);
1148 Py_DECREF(item);
1149 if (err < 0)
1150 goto fail;
1151
1152 return list;
1153
1154 fail:
1155 Py_DECREF(list);
1156 return NULL;
1157}
1158
1159
1160static char join__doc__[] =
1161"S.join(sequence) -> string\n\
1162\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001163Return a string which is the concatenation of the strings in the\n\
1164sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165
1166static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001167string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001168{
1169 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001170 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001171 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172 char *p;
1173 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001174 size_t sz = 0;
1175 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001176 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177
Tim Peters19fe14e2001-01-19 03:03:47 +00001178 seq = PySequence_Fast(orig, "");
1179 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180 if (PyErr_ExceptionMatches(PyExc_TypeError))
1181 PyErr_Format(PyExc_TypeError,
1182 "sequence expected, %.80s found",
1183 orig->ob_type->tp_name);
1184 return NULL;
1185 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001186
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001187 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001188 if (seqlen == 0) {
1189 Py_DECREF(seq);
1190 return PyString_FromString("");
1191 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001192 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001193 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001194 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1195 PyErr_Format(PyExc_TypeError,
1196 "sequence item 0: expected string,"
1197 " %.80s found",
1198 item->ob_type->tp_name);
1199 Py_DECREF(seq);
1200 return NULL;
1201 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001202 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001203 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001204 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001206
Tim Peters19fe14e2001-01-19 03:03:47 +00001207 /* There are at least two things to join. Do a pre-pass to figure out
1208 * the total amount of space we'll need (sz), see whether any argument
1209 * is absurd, and defer to the Unicode join if appropriate.
1210 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001211 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001212 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001213 item = PySequence_Fast_GET_ITEM(seq, i);
1214 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001215#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001216 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001217 /* Defer to Unicode join.
1218 * CAUTION: There's no gurantee that the
1219 * original sequence can be iterated over
1220 * again, so we must pass seq here.
1221 */
1222 PyObject *result;
1223 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001224 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001225 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001226 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001227#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001228 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001229 "sequence item %i: expected string,"
1230 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001231 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001232 Py_DECREF(seq);
1233 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001234 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001235 sz += PyString_GET_SIZE(item);
1236 if (i != 0)
1237 sz += seplen;
1238 if (sz < old_sz || sz > INT_MAX) {
1239 PyErr_SetString(PyExc_OverflowError,
1240 "join() is too long for a Python string");
1241 Py_DECREF(seq);
1242 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001244 }
1245
1246 /* Allocate result space. */
1247 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1248 if (res == NULL) {
1249 Py_DECREF(seq);
1250 return NULL;
1251 }
1252
1253 /* Catenate everything. */
1254 p = PyString_AS_STRING(res);
1255 for (i = 0; i < seqlen; ++i) {
1256 size_t n;
1257 item = PySequence_Fast_GET_ITEM(seq, i);
1258 n = PyString_GET_SIZE(item);
1259 memcpy(p, PyString_AS_STRING(item), n);
1260 p += n;
1261 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001262 memcpy(p, sep, seplen);
1263 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001264 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001266
Jeremy Hylton49048292000-07-11 03:28:17 +00001267 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001268 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269}
1270
Tim Peters52e155e2001-06-16 05:42:57 +00001271PyObject *
1272_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001273{
Tim Petersa7259592001-06-16 05:11:17 +00001274 assert(sep != NULL && PyString_Check(sep));
1275 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001276 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001277}
1278
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001279static long
Fred Drakeba096332000-07-09 07:04:36 +00001280string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283 int len = PyString_GET_SIZE(self);
1284 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001285 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001287 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001288 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289 return -2;
1290 if (PyString_Check(subobj)) {
1291 sub = PyString_AS_STRING(subobj);
1292 n = PyString_GET_SIZE(subobj);
1293 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001294#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295 else if (PyUnicode_Check(subobj))
1296 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001297#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 return -2;
1300
1301 if (last > len)
1302 last = len;
1303 if (last < 0)
1304 last += len;
1305 if (last < 0)
1306 last = 0;
1307 if (i < 0)
1308 i += len;
1309 if (i < 0)
1310 i = 0;
1311
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 if (dir > 0) {
1313 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 last -= n;
1316 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001317 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001318 return (long)i;
1319 }
1320 else {
1321 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001322
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323 if (n == 0 && i <= last)
1324 return (long)last;
1325 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001326 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001327 return (long)j;
1328 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001329
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return -1;
1331}
1332
1333
1334static char find__doc__[] =
1335"S.find(sub [,start [,end]]) -> int\n\
1336\n\
1337Return the lowest index in S where substring sub is found,\n\
1338such that sub is contained within s[start,end]. Optional\n\
1339arguments start and end are interpreted as in slice notation.\n\
1340\n\
1341Return -1 on failure.";
1342
1343static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001344string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 if (result == -2)
1348 return NULL;
1349 return PyInt_FromLong(result);
1350}
1351
1352
1353static char index__doc__[] =
1354"S.index(sub [,start [,end]]) -> int\n\
1355\n\
1356Like S.find() but raise ValueError when the substring is not found.";
1357
1358static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001359string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 if (result == -2)
1363 return NULL;
1364 if (result == -1) {
1365 PyErr_SetString(PyExc_ValueError,
1366 "substring not found in string.index");
1367 return NULL;
1368 }
1369 return PyInt_FromLong(result);
1370}
1371
1372
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373static char rfind__doc__[] =
1374"S.rfind(sub [,start [,end]]) -> int\n\
1375\n\
1376Return the highest index in S where substring sub is found,\n\
1377such that sub is contained within s[start,end]. Optional\n\
1378arguments start and end are interpreted as in slice notation.\n\
1379\n\
1380Return -1 on failure.";
1381
1382static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001383string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001385 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386 if (result == -2)
1387 return NULL;
1388 return PyInt_FromLong(result);
1389}
1390
1391
1392static char rindex__doc__[] =
1393"S.rindex(sub [,start [,end]]) -> int\n\
1394\n\
1395Like S.rfind() but raise ValueError when the substring is not found.";
1396
1397static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001398string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 if (result == -2)
1402 return NULL;
1403 if (result == -1) {
1404 PyErr_SetString(PyExc_ValueError,
1405 "substring not found in string.rindex");
1406 return NULL;
1407 }
1408 return PyInt_FromLong(result);
1409}
1410
1411
1412static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001413do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414{
1415 char *s = PyString_AS_STRING(self);
1416 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418 i = 0;
1419 if (striptype != RIGHTSTRIP) {
1420 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1421 i++;
1422 }
1423 }
1424
1425 j = len;
1426 if (striptype != LEFTSTRIP) {
1427 do {
1428 j--;
1429 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1430 j++;
1431 }
1432
Tim Peters8fa5dd02001-09-12 02:18:30 +00001433 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434 Py_INCREF(self);
1435 return (PyObject*)self;
1436 }
1437 else
1438 return PyString_FromStringAndSize(s+i, j-i);
1439}
1440
1441
1442static char strip__doc__[] =
1443"S.strip() -> string\n\
1444\n\
1445Return a copy of the string S with leading and trailing\n\
1446whitespace removed.";
1447
1448static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001449string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001451 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452}
1453
1454
1455static char lstrip__doc__[] =
1456"S.lstrip() -> string\n\
1457\n\
1458Return a copy of the string S with leading whitespace removed.";
1459
1460static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001461string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001463 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464}
1465
1466
1467static char rstrip__doc__[] =
1468"S.rstrip() -> string\n\
1469\n\
1470Return a copy of the string S with trailing whitespace removed.";
1471
1472static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001473string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001475 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476}
1477
1478
1479static char lower__doc__[] =
1480"S.lower() -> string\n\
1481\n\
1482Return a copy of the string S converted to lowercase.";
1483
1484static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001485string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486{
1487 char *s = PyString_AS_STRING(self), *s_new;
1488 int i, n = PyString_GET_SIZE(self);
1489 PyObject *new;
1490
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 new = PyString_FromStringAndSize(NULL, n);
1492 if (new == NULL)
1493 return NULL;
1494 s_new = PyString_AsString(new);
1495 for (i = 0; i < n; i++) {
1496 int c = Py_CHARMASK(*s++);
1497 if (isupper(c)) {
1498 *s_new = tolower(c);
1499 } else
1500 *s_new = c;
1501 s_new++;
1502 }
1503 return new;
1504}
1505
1506
1507static char upper__doc__[] =
1508"S.upper() -> string\n\
1509\n\
1510Return a copy of the string S converted to uppercase.";
1511
1512static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001513string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514{
1515 char *s = PyString_AS_STRING(self), *s_new;
1516 int i, n = PyString_GET_SIZE(self);
1517 PyObject *new;
1518
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 new = PyString_FromStringAndSize(NULL, n);
1520 if (new == NULL)
1521 return NULL;
1522 s_new = PyString_AsString(new);
1523 for (i = 0; i < n; i++) {
1524 int c = Py_CHARMASK(*s++);
1525 if (islower(c)) {
1526 *s_new = toupper(c);
1527 } else
1528 *s_new = c;
1529 s_new++;
1530 }
1531 return new;
1532}
1533
1534
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535static char title__doc__[] =
1536"S.title() -> string\n\
1537\n\
1538Return a titlecased version of S, i.e. words start with uppercase\n\
1539characters, all remaining cased characters have lowercase.";
1540
1541static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001542string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543{
1544 char *s = PyString_AS_STRING(self), *s_new;
1545 int i, n = PyString_GET_SIZE(self);
1546 int previous_is_cased = 0;
1547 PyObject *new;
1548
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 new = PyString_FromStringAndSize(NULL, n);
1550 if (new == NULL)
1551 return NULL;
1552 s_new = PyString_AsString(new);
1553 for (i = 0; i < n; i++) {
1554 int c = Py_CHARMASK(*s++);
1555 if (islower(c)) {
1556 if (!previous_is_cased)
1557 c = toupper(c);
1558 previous_is_cased = 1;
1559 } else if (isupper(c)) {
1560 if (previous_is_cased)
1561 c = tolower(c);
1562 previous_is_cased = 1;
1563 } else
1564 previous_is_cased = 0;
1565 *s_new++ = c;
1566 }
1567 return new;
1568}
1569
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570static char capitalize__doc__[] =
1571"S.capitalize() -> string\n\
1572\n\
1573Return a copy of the string S with only its first character\n\
1574capitalized.";
1575
1576static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001577string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001578{
1579 char *s = PyString_AS_STRING(self), *s_new;
1580 int i, n = PyString_GET_SIZE(self);
1581 PyObject *new;
1582
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 new = PyString_FromStringAndSize(NULL, n);
1584 if (new == NULL)
1585 return NULL;
1586 s_new = PyString_AsString(new);
1587 if (0 < n) {
1588 int c = Py_CHARMASK(*s++);
1589 if (islower(c))
1590 *s_new = toupper(c);
1591 else
1592 *s_new = c;
1593 s_new++;
1594 }
1595 for (i = 1; i < n; i++) {
1596 int c = Py_CHARMASK(*s++);
1597 if (isupper(c))
1598 *s_new = tolower(c);
1599 else
1600 *s_new = c;
1601 s_new++;
1602 }
1603 return new;
1604}
1605
1606
1607static char count__doc__[] =
1608"S.count(sub[, start[, end]]) -> int\n\
1609\n\
1610Return the number of occurrences of substring sub in string\n\
1611S[start:end]. Optional arguments start and end are\n\
1612interpreted as in slice notation.";
1613
1614static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001615string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001617 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618 int len = PyString_GET_SIZE(self), n;
1619 int i = 0, last = INT_MAX;
1620 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
Guido van Rossumc6821402000-05-08 14:08:05 +00001623 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1624 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001626
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 if (PyString_Check(subobj)) {
1628 sub = PyString_AS_STRING(subobj);
1629 n = PyString_GET_SIZE(subobj);
1630 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001631#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001632 else if (PyUnicode_Check(subobj)) {
1633 int count;
1634 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1635 if (count == -1)
1636 return NULL;
1637 else
1638 return PyInt_FromLong((long) count);
1639 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001640#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001641 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1642 return NULL;
1643
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644 if (last > len)
1645 last = len;
1646 if (last < 0)
1647 last += len;
1648 if (last < 0)
1649 last = 0;
1650 if (i < 0)
1651 i += len;
1652 if (i < 0)
1653 i = 0;
1654 m = last + 1 - n;
1655 if (n == 0)
1656 return PyInt_FromLong((long) (m-i));
1657
1658 r = 0;
1659 while (i < m) {
1660 if (!memcmp(s+i, sub, n)) {
1661 r++;
1662 i += n;
1663 } else {
1664 i++;
1665 }
1666 }
1667 return PyInt_FromLong((long) r);
1668}
1669
1670
1671static char swapcase__doc__[] =
1672"S.swapcase() -> string\n\
1673\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675converted to lowercase and vice versa.";
1676
1677static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001678string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679{
1680 char *s = PyString_AS_STRING(self), *s_new;
1681 int i, n = PyString_GET_SIZE(self);
1682 PyObject *new;
1683
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 new = PyString_FromStringAndSize(NULL, n);
1685 if (new == NULL)
1686 return NULL;
1687 s_new = PyString_AsString(new);
1688 for (i = 0; i < n; i++) {
1689 int c = Py_CHARMASK(*s++);
1690 if (islower(c)) {
1691 *s_new = toupper(c);
1692 }
1693 else if (isupper(c)) {
1694 *s_new = tolower(c);
1695 }
1696 else
1697 *s_new = c;
1698 s_new++;
1699 }
1700 return new;
1701}
1702
1703
1704static char translate__doc__[] =
1705"S.translate(table [,deletechars]) -> string\n\
1706\n\
1707Return a copy of the string S, where all characters occurring\n\
1708in the optional argument deletechars are removed, and the\n\
1709remaining characters have been mapped through the given\n\
1710translation table, which must be a string of length 256.";
1711
1712static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001713string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715 register char *input, *output;
1716 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 register int i, c, changed = 0;
1718 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 int inlen, tablen, dellen = 0;
1721 PyObject *result;
1722 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724
Guido van Rossum4c08d552000-03-10 22:55:18 +00001725 if (!PyArg_ParseTuple(args, "O|O:translate",
1726 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728
1729 if (PyString_Check(tableobj)) {
1730 table1 = PyString_AS_STRING(tableobj);
1731 tablen = PyString_GET_SIZE(tableobj);
1732 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001733#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001735 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 parameter; instead a mapping to None will cause characters
1737 to be deleted. */
1738 if (delobj != NULL) {
1739 PyErr_SetString(PyExc_TypeError,
1740 "deletions are implemented differently for unicode");
1741 return NULL;
1742 }
1743 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1744 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001745#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001746 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748
1749 if (delobj != NULL) {
1750 if (PyString_Check(delobj)) {
1751 del_table = PyString_AS_STRING(delobj);
1752 dellen = PyString_GET_SIZE(delobj);
1753 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001754#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 else if (PyUnicode_Check(delobj)) {
1756 PyErr_SetString(PyExc_TypeError,
1757 "deletions are implemented differently for unicode");
1758 return NULL;
1759 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001760#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1762 return NULL;
1763
1764 if (tablen != 256) {
1765 PyErr_SetString(PyExc_ValueError,
1766 "translation table must be 256 characters long");
1767 return NULL;
1768 }
1769 }
1770 else {
1771 del_table = NULL;
1772 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773 }
1774
1775 table = table1;
1776 inlen = PyString_Size(input_obj);
1777 result = PyString_FromStringAndSize((char *)NULL, inlen);
1778 if (result == NULL)
1779 return NULL;
1780 output_start = output = PyString_AsString(result);
1781 input = PyString_AsString(input_obj);
1782
1783 if (dellen == 0) {
1784 /* If no deletions are required, use faster code */
1785 for (i = inlen; --i >= 0; ) {
1786 c = Py_CHARMASK(*input++);
1787 if (Py_CHARMASK((*output++ = table[c])) != c)
1788 changed = 1;
1789 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001790 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791 return result;
1792 Py_DECREF(result);
1793 Py_INCREF(input_obj);
1794 return input_obj;
1795 }
1796
1797 for (i = 0; i < 256; i++)
1798 trans_table[i] = Py_CHARMASK(table[i]);
1799
1800 for (i = 0; i < dellen; i++)
1801 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1802
1803 for (i = inlen; --i >= 0; ) {
1804 c = Py_CHARMASK(*input++);
1805 if (trans_table[c] != -1)
1806 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1807 continue;
1808 changed = 1;
1809 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001810 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811 Py_DECREF(result);
1812 Py_INCREF(input_obj);
1813 return input_obj;
1814 }
1815 /* Fix the size of the resulting string */
1816 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1817 return NULL;
1818 return result;
1819}
1820
1821
1822/* What follows is used for implementing replace(). Perry Stoll. */
1823
1824/*
1825 mymemfind
1826
1827 strstr replacement for arbitrary blocks of memory.
1828
Barry Warsaw51ac5802000-03-20 16:36:48 +00001829 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 contents of memory pointed to by PAT. Returns the index into MEM if
1831 found, or -1 if not found. If len of PAT is greater than length of
1832 MEM, the function returns -1.
1833*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001834static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001835mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
1837 register int ii;
1838
1839 /* pattern can not occur in the last pat_len-1 chars */
1840 len -= pat_len;
1841
1842 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001843 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 return ii;
1845 }
1846 }
1847 return -1;
1848}
1849
1850/*
1851 mymemcnt
1852
1853 Return the number of distinct times PAT is found in MEM.
1854 meaning mem=1111 and pat==11 returns 2.
1855 mem=11111 and pat==11 also return 2.
1856 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001857static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001858mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859{
1860 register int offset = 0;
1861 int nfound = 0;
1862
1863 while (len >= 0) {
1864 offset = mymemfind(mem, len, pat, pat_len);
1865 if (offset == -1)
1866 break;
1867 mem += offset + pat_len;
1868 len -= offset + pat_len;
1869 nfound++;
1870 }
1871 return nfound;
1872}
1873
1874/*
1875 mymemreplace
1876
Thomas Wouters7e474022000-07-16 12:04:32 +00001877 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 replaced with SUB.
1879
Thomas Wouters7e474022000-07-16 12:04:32 +00001880 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881 of PAT in STR, then the original string is returned. Otherwise, a new
1882 string is allocated here and returned.
1883
1884 on return, out_len is:
1885 the length of output string, or
1886 -1 if the input string is returned, or
1887 unchanged if an error occurs (no memory).
1888
1889 return value is:
1890 the new string allocated locally, or
1891 NULL if an error occurred.
1892*/
1893static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001894mymemreplace(const char *str, int len, /* input string */
1895 const char *pat, int pat_len, /* pattern string to find */
1896 const char *sub, int sub_len, /* substitution string */
1897 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001898 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899{
1900 char *out_s;
1901 char *new_s;
1902 int nfound, offset, new_len;
1903
1904 if (len == 0 || pat_len > len)
1905 goto return_same;
1906
1907 /* find length of output string */
1908 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001909 if (count < 0)
1910 count = INT_MAX;
1911 else if (nfound > count)
1912 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913 if (nfound == 0)
1914 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001915
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001917 if (new_len == 0) {
1918 /* Have to allocate something for the caller to free(). */
1919 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001920 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001921 return NULL;
1922 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001924 else {
1925 assert(new_len > 0);
1926 new_s = (char *)PyMem_MALLOC(new_len);
1927 if (new_s == NULL)
1928 return NULL;
1929 out_s = new_s;
1930
Tim Peters9c012af2001-05-10 00:32:57 +00001931 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001932 /* find index of next instance of pattern */
1933 offset = mymemfind(str, len, pat, pat_len);
1934 if (offset == -1)
1935 break;
1936
1937 /* copy non matching part of input string */
1938 memcpy(new_s, str, offset);
1939 str += offset + pat_len;
1940 len -= offset + pat_len;
1941
1942 /* copy substitute into the output string */
1943 new_s += offset;
1944 memcpy(new_s, sub, sub_len);
1945 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001946 }
1947 /* copy any remaining values into output string */
1948 if (len > 0)
1949 memcpy(new_s, str, len);
1950 }
1951 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952 return out_s;
1953
1954 return_same:
1955 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001956 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957}
1958
1959
1960static char replace__doc__[] =
1961"S.replace (old, new[, maxsplit]) -> string\n\
1962\n\
1963Return a copy of string S with all occurrences of substring\n\
1964old replaced by new. If the optional argument maxsplit is\n\
1965given, only the first maxsplit occurrences are replaced.";
1966
1967static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001968string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 const char *str = PyString_AS_STRING(self), *sub, *repl;
1971 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001972 const int len = PyString_GET_SIZE(self);
1973 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001974 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977
Guido van Rossum4c08d552000-03-10 22:55:18 +00001978 if (!PyArg_ParseTuple(args, "OO|i:replace",
1979 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001981
1982 if (PyString_Check(subobj)) {
1983 sub = PyString_AS_STRING(subobj);
1984 sub_len = PyString_GET_SIZE(subobj);
1985 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001986#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001987 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001988 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001989 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001990#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1992 return NULL;
1993
1994 if (PyString_Check(replobj)) {
1995 repl = PyString_AS_STRING(replobj);
1996 repl_len = PyString_GET_SIZE(replobj);
1997 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001998#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001999 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002000 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002001 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002002#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002003 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2004 return NULL;
2005
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002006 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002007 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 return NULL;
2009 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002010 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011 if (new_s == NULL) {
2012 PyErr_NoMemory();
2013 return NULL;
2014 }
2015 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002016 if (PyString_CheckExact(self)) {
2017 /* we're returning another reference to self */
2018 new = (PyObject*)self;
2019 Py_INCREF(new);
2020 }
2021 else {
2022 new = PyString_FromStringAndSize(str, len);
2023 if (new == NULL)
2024 return NULL;
2025 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026 }
2027 else {
2028 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002029 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030 }
2031 return new;
2032}
2033
2034
2035static char startswith__doc__[] =
2036"S.startswith(prefix[, start[, end]]) -> int\n\
2037\n\
2038Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2039optional start, test S beginning at that position. With optional end, stop\n\
2040comparing S at that position.";
2041
2042static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002043string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002047 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 int plen;
2049 int start = 0;
2050 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052
Guido van Rossumc6821402000-05-08 14:08:05 +00002053 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2054 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 return NULL;
2056 if (PyString_Check(subobj)) {
2057 prefix = PyString_AS_STRING(subobj);
2058 plen = PyString_GET_SIZE(subobj);
2059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002060#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002061 else if (PyUnicode_Check(subobj)) {
2062 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002063 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002064 subobj, start, end, -1);
2065 if (rc == -1)
2066 return NULL;
2067 else
2068 return PyInt_FromLong((long) rc);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 return NULL;
2073
2074 /* adopt Java semantics for index out of range. it is legal for
2075 * offset to be == plen, but this only returns true if prefix is
2076 * the empty string.
2077 */
2078 if (start < 0 || start+plen > len)
2079 return PyInt_FromLong(0);
2080
2081 if (!memcmp(str+start, prefix, plen)) {
2082 /* did the match end after the specified end? */
2083 if (end < 0)
2084 return PyInt_FromLong(1);
2085 else if (end - start < plen)
2086 return PyInt_FromLong(0);
2087 else
2088 return PyInt_FromLong(1);
2089 }
2090 else return PyInt_FromLong(0);
2091}
2092
2093
2094static char endswith__doc__[] =
2095"S.endswith(suffix[, start[, end]]) -> int\n\
2096\n\
2097Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2098optional start, test S beginning at that position. With optional end, stop\n\
2099comparing S at that position.";
2100
2101static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002102string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 const char* suffix;
2107 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 int start = 0;
2109 int end = -1;
2110 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112
Guido van Rossumc6821402000-05-08 14:08:05 +00002113 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2114 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 return NULL;
2116 if (PyString_Check(subobj)) {
2117 suffix = PyString_AS_STRING(subobj);
2118 slen = PyString_GET_SIZE(subobj);
2119 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002120#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002121 else if (PyUnicode_Check(subobj)) {
2122 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002123 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002124 subobj, start, end, +1);
2125 if (rc == -1)
2126 return NULL;
2127 else
2128 return PyInt_FromLong((long) rc);
2129 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002130#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002131 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return NULL;
2133
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 return PyInt_FromLong(0);
2136
2137 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
Guido van Rossum4c08d552000-03-10 22:55:18 +00002140 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 return PyInt_FromLong(1);
2142 else return PyInt_FromLong(0);
2143}
2144
2145
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002146static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002147"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002148\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002149Encodes S using the codec registered for encoding. encoding defaults\n\
2150to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002151handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2152a ValueError. Other possible values are 'ignore' and 'replace'.";
2153
2154static PyObject *
2155string_encode(PyStringObject *self, PyObject *args)
2156{
2157 char *encoding = NULL;
2158 char *errors = NULL;
2159 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2160 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002161 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2162}
2163
2164
2165static char decode__doc__[] =
2166"S.decode([encoding[,errors]]) -> object\n\
2167\n\
2168Decodes S using the codec registered for encoding. encoding defaults\n\
2169to the default encoding. errors may be given to set a different error\n\
2170handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2171a ValueError. Other possible values are 'ignore' and 'replace'.";
2172
2173static PyObject *
2174string_decode(PyStringObject *self, PyObject *args)
2175{
2176 char *encoding = NULL;
2177 char *errors = NULL;
2178 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2179 return NULL;
2180 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002181}
2182
2183
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184static char expandtabs__doc__[] =
2185"S.expandtabs([tabsize]) -> string\n\
2186\n\
2187Return a copy of S where all tab characters are expanded using spaces.\n\
2188If tabsize is not given, a tab size of 8 characters is assumed.";
2189
2190static PyObject*
2191string_expandtabs(PyStringObject *self, PyObject *args)
2192{
2193 const char *e, *p;
2194 char *q;
2195 int i, j;
2196 PyObject *u;
2197 int tabsize = 8;
2198
2199 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2200 return NULL;
2201
Thomas Wouters7e474022000-07-16 12:04:32 +00002202 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 i = j = 0;
2204 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2205 for (p = PyString_AS_STRING(self); p < e; p++)
2206 if (*p == '\t') {
2207 if (tabsize > 0)
2208 j += tabsize - (j % tabsize);
2209 }
2210 else {
2211 j++;
2212 if (*p == '\n' || *p == '\r') {
2213 i += j;
2214 j = 0;
2215 }
2216 }
2217
2218 /* Second pass: create output string and fill it */
2219 u = PyString_FromStringAndSize(NULL, i + j);
2220 if (!u)
2221 return NULL;
2222
2223 j = 0;
2224 q = PyString_AS_STRING(u);
2225
2226 for (p = PyString_AS_STRING(self); p < e; p++)
2227 if (*p == '\t') {
2228 if (tabsize > 0) {
2229 i = tabsize - (j % tabsize);
2230 j += i;
2231 while (i--)
2232 *q++ = ' ';
2233 }
2234 }
2235 else {
2236 j++;
2237 *q++ = *p;
2238 if (*p == '\n' || *p == '\r')
2239 j = 0;
2240 }
2241
2242 return u;
2243}
2244
Tim Peters8fa5dd02001-09-12 02:18:30 +00002245static PyObject *
2246pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247{
2248 PyObject *u;
2249
2250 if (left < 0)
2251 left = 0;
2252 if (right < 0)
2253 right = 0;
2254
Tim Peters8fa5dd02001-09-12 02:18:30 +00002255 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 Py_INCREF(self);
2257 return (PyObject *)self;
2258 }
2259
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002260 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 left + PyString_GET_SIZE(self) + right);
2262 if (u) {
2263 if (left)
2264 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002265 memcpy(PyString_AS_STRING(u) + left,
2266 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 PyString_GET_SIZE(self));
2268 if (right)
2269 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2270 fill, right);
2271 }
2272
2273 return u;
2274}
2275
2276static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002277"S.ljust(width) -> string\n"
2278"\n"
2279"Return S left justified in a string of length width. Padding is\n"
2280"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281
2282static PyObject *
2283string_ljust(PyStringObject *self, PyObject *args)
2284{
2285 int width;
2286 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2287 return NULL;
2288
Tim Peters8fa5dd02001-09-12 02:18:30 +00002289 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290 Py_INCREF(self);
2291 return (PyObject*) self;
2292 }
2293
2294 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2295}
2296
2297
2298static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002299"S.rjust(width) -> string\n"
2300"\n"
2301"Return S right justified in a string of length width. Padding is\n"
2302"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303
2304static PyObject *
2305string_rjust(PyStringObject *self, PyObject *args)
2306{
2307 int width;
2308 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2309 return NULL;
2310
Tim Peters8fa5dd02001-09-12 02:18:30 +00002311 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 Py_INCREF(self);
2313 return (PyObject*) self;
2314 }
2315
2316 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2317}
2318
2319
2320static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002321"S.center(width) -> string\n"
2322"\n"
2323"Return S centered in a string of length width. Padding is done\n"
2324"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325
2326static PyObject *
2327string_center(PyStringObject *self, PyObject *args)
2328{
2329 int marg, left;
2330 int width;
2331
2332 if (!PyArg_ParseTuple(args, "i:center", &width))
2333 return NULL;
2334
Tim Peters8fa5dd02001-09-12 02:18:30 +00002335 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 Py_INCREF(self);
2337 return (PyObject*) self;
2338 }
2339
2340 marg = width - PyString_GET_SIZE(self);
2341 left = marg / 2 + (marg & width & 1);
2342
2343 return pad(self, left, marg - left, ' ');
2344}
2345
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002347"S.isspace() -> int\n"
2348"\n"
2349"Return 1 if there are only whitespace characters in S,\n"
2350"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351
2352static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002353string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002354{
Fred Drakeba096332000-07-09 07:04:36 +00002355 register const unsigned char *p
2356 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002357 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 /* Shortcut for single character strings */
2360 if (PyString_GET_SIZE(self) == 1 &&
2361 isspace(*p))
2362 return PyInt_FromLong(1);
2363
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002364 /* Special case for empty strings */
2365 if (PyString_GET_SIZE(self) == 0)
2366 return PyInt_FromLong(0);
2367
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 e = p + PyString_GET_SIZE(self);
2369 for (; p < e; p++) {
2370 if (!isspace(*p))
2371 return PyInt_FromLong(0);
2372 }
2373 return PyInt_FromLong(1);
2374}
2375
2376
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002377static char isalpha__doc__[] =
2378"S.isalpha() -> int\n\
2379\n\
2380Return 1 if all characters in S are alphabetic\n\
2381and there is at least one character in S, 0 otherwise.";
2382
2383static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002384string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002385{
Fred Drakeba096332000-07-09 07:04:36 +00002386 register const unsigned char *p
2387 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002388 register const unsigned char *e;
2389
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002390 /* Shortcut for single character strings */
2391 if (PyString_GET_SIZE(self) == 1 &&
2392 isalpha(*p))
2393 return PyInt_FromLong(1);
2394
2395 /* Special case for empty strings */
2396 if (PyString_GET_SIZE(self) == 0)
2397 return PyInt_FromLong(0);
2398
2399 e = p + PyString_GET_SIZE(self);
2400 for (; p < e; p++) {
2401 if (!isalpha(*p))
2402 return PyInt_FromLong(0);
2403 }
2404 return PyInt_FromLong(1);
2405}
2406
2407
2408static char isalnum__doc__[] =
2409"S.isalnum() -> int\n\
2410\n\
2411Return 1 if all characters in S are alphanumeric\n\
2412and there is at least one character in S, 0 otherwise.";
2413
2414static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002415string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002416{
Fred Drakeba096332000-07-09 07:04:36 +00002417 register const unsigned char *p
2418 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002419 register const unsigned char *e;
2420
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002421 /* Shortcut for single character strings */
2422 if (PyString_GET_SIZE(self) == 1 &&
2423 isalnum(*p))
2424 return PyInt_FromLong(1);
2425
2426 /* Special case for empty strings */
2427 if (PyString_GET_SIZE(self) == 0)
2428 return PyInt_FromLong(0);
2429
2430 e = p + PyString_GET_SIZE(self);
2431 for (; p < e; p++) {
2432 if (!isalnum(*p))
2433 return PyInt_FromLong(0);
2434 }
2435 return PyInt_FromLong(1);
2436}
2437
2438
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439static char isdigit__doc__[] =
2440"S.isdigit() -> int\n\
2441\n\
2442Return 1 if there are only digit characters in S,\n\
24430 otherwise.";
2444
2445static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002446string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447{
Fred Drakeba096332000-07-09 07:04:36 +00002448 register const unsigned char *p
2449 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002450 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452 /* Shortcut for single character strings */
2453 if (PyString_GET_SIZE(self) == 1 &&
2454 isdigit(*p))
2455 return PyInt_FromLong(1);
2456
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002457 /* Special case for empty strings */
2458 if (PyString_GET_SIZE(self) == 0)
2459 return PyInt_FromLong(0);
2460
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461 e = p + PyString_GET_SIZE(self);
2462 for (; p < e; p++) {
2463 if (!isdigit(*p))
2464 return PyInt_FromLong(0);
2465 }
2466 return PyInt_FromLong(1);
2467}
2468
2469
2470static char islower__doc__[] =
2471"S.islower() -> int\n\
2472\n\
2473Return 1 if all cased characters in S are lowercase and there is\n\
2474at least one cased character in S, 0 otherwise.";
2475
2476static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002477string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002478{
Fred Drakeba096332000-07-09 07:04:36 +00002479 register const unsigned char *p
2480 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002481 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002482 int cased;
2483
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 /* Shortcut for single character strings */
2485 if (PyString_GET_SIZE(self) == 1)
2486 return PyInt_FromLong(islower(*p) != 0);
2487
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002488 /* Special case for empty strings */
2489 if (PyString_GET_SIZE(self) == 0)
2490 return PyInt_FromLong(0);
2491
Guido van Rossum4c08d552000-03-10 22:55:18 +00002492 e = p + PyString_GET_SIZE(self);
2493 cased = 0;
2494 for (; p < e; p++) {
2495 if (isupper(*p))
2496 return PyInt_FromLong(0);
2497 else if (!cased && islower(*p))
2498 cased = 1;
2499 }
2500 return PyInt_FromLong(cased);
2501}
2502
2503
2504static char isupper__doc__[] =
2505"S.isupper() -> int\n\
2506\n\
2507Return 1 if all cased characters in S are uppercase and there is\n\
2508at least one cased character in S, 0 otherwise.";
2509
2510static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002511string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002512{
Fred Drakeba096332000-07-09 07:04:36 +00002513 register const unsigned char *p
2514 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002515 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 int cased;
2517
Guido van Rossum4c08d552000-03-10 22:55:18 +00002518 /* Shortcut for single character strings */
2519 if (PyString_GET_SIZE(self) == 1)
2520 return PyInt_FromLong(isupper(*p) != 0);
2521
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002522 /* Special case for empty strings */
2523 if (PyString_GET_SIZE(self) == 0)
2524 return PyInt_FromLong(0);
2525
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 e = p + PyString_GET_SIZE(self);
2527 cased = 0;
2528 for (; p < e; p++) {
2529 if (islower(*p))
2530 return PyInt_FromLong(0);
2531 else if (!cased && isupper(*p))
2532 cased = 1;
2533 }
2534 return PyInt_FromLong(cased);
2535}
2536
2537
2538static char istitle__doc__[] =
2539"S.istitle() -> int\n\
2540\n\
2541Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2542may only follow uncased characters and lowercase characters only cased\n\
2543ones. Return 0 otherwise.";
2544
2545static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002546string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547{
Fred Drakeba096332000-07-09 07:04:36 +00002548 register const unsigned char *p
2549 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002550 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 int cased, previous_is_cased;
2552
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 /* Shortcut for single character strings */
2554 if (PyString_GET_SIZE(self) == 1)
2555 return PyInt_FromLong(isupper(*p) != 0);
2556
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002557 /* Special case for empty strings */
2558 if (PyString_GET_SIZE(self) == 0)
2559 return PyInt_FromLong(0);
2560
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 e = p + PyString_GET_SIZE(self);
2562 cased = 0;
2563 previous_is_cased = 0;
2564 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002565 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566
2567 if (isupper(ch)) {
2568 if (previous_is_cased)
2569 return PyInt_FromLong(0);
2570 previous_is_cased = 1;
2571 cased = 1;
2572 }
2573 else if (islower(ch)) {
2574 if (!previous_is_cased)
2575 return PyInt_FromLong(0);
2576 previous_is_cased = 1;
2577 cased = 1;
2578 }
2579 else
2580 previous_is_cased = 0;
2581 }
2582 return PyInt_FromLong(cased);
2583}
2584
2585
2586static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002587"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588\n\
2589Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002590Line breaks are not included in the resulting list unless keepends\n\
2591is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592
2593#define SPLIT_APPEND(data, left, right) \
2594 str = PyString_FromStringAndSize(data + left, right - left); \
2595 if (!str) \
2596 goto onError; \
2597 if (PyList_Append(list, str)) { \
2598 Py_DECREF(str); \
2599 goto onError; \
2600 } \
2601 else \
2602 Py_DECREF(str);
2603
2604static PyObject*
2605string_splitlines(PyStringObject *self, PyObject *args)
2606{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607 register int i;
2608 register int j;
2609 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002610 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 PyObject *list;
2612 PyObject *str;
2613 char *data;
2614
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002615 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 return NULL;
2617
2618 data = PyString_AS_STRING(self);
2619 len = PyString_GET_SIZE(self);
2620
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621 list = PyList_New(0);
2622 if (!list)
2623 goto onError;
2624
2625 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002626 int eol;
2627
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 /* Find a line and append it */
2629 while (i < len && data[i] != '\n' && data[i] != '\r')
2630 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631
2632 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002633 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002634 if (i < len) {
2635 if (data[i] == '\r' && i + 1 < len &&
2636 data[i+1] == '\n')
2637 i += 2;
2638 else
2639 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002640 if (keepends)
2641 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002643 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644 j = i;
2645 }
2646 if (j < len) {
2647 SPLIT_APPEND(data, j, len);
2648 }
2649
2650 return list;
2651
2652 onError:
2653 Py_DECREF(list);
2654 return NULL;
2655}
2656
2657#undef SPLIT_APPEND
2658
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002660static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002661string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 /* Counterparts of the obsolete stropmodule functions; except
2663 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002664 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2665 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2666 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2667 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2668 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2669 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2670 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2671 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2672 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2673 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2674 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2675 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2676 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2677 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2678 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2679 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2680 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2681 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2682 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2683 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2684 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2685 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2686 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2687 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2688 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2689 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2690 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2691 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2692 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2693 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2694 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2695 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2696 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002697#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002698 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002699#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002700 {NULL, NULL} /* sentinel */
2701};
2702
Guido van Rossumae960af2001-08-30 03:11:59 +00002703staticforward PyObject *
2704str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2705
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002706static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002707string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002708{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002709 PyObject *x = NULL;
2710 static char *kwlist[] = {"object", 0};
2711
Guido van Rossumae960af2001-08-30 03:11:59 +00002712 if (type != &PyString_Type)
2713 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002714 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2715 return NULL;
2716 if (x == NULL)
2717 return PyString_FromString("");
2718 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002719}
2720
Guido van Rossumae960af2001-08-30 03:11:59 +00002721static PyObject *
2722str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2723{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002724 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002725 int n;
2726
2727 assert(PyType_IsSubtype(type, &PyString_Type));
2728 tmp = string_new(&PyString_Type, args, kwds);
2729 if (tmp == NULL)
2730 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002731 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002732 n = PyString_GET_SIZE(tmp);
2733 pnew = type->tp_alloc(type, n);
2734 if (pnew != NULL) {
2735 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2736#ifdef CACHE_HASH
2737 ((PyStringObject *)pnew)->ob_shash =
2738 ((PyStringObject *)tmp)->ob_shash;
2739#endif
2740#ifdef INTERN_STRINGS
2741 ((PyStringObject *)pnew)->ob_sinterned =
2742 ((PyStringObject *)tmp)->ob_sinterned;
2743#endif
2744 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002745 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002746 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002747}
2748
Tim Peters6d6c1a32001-08-02 04:15:00 +00002749static char string_doc[] =
2750"str(object) -> string\n\
2751\n\
2752Return a nice string representation of the object.\n\
2753If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002754
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002755PyTypeObject PyString_Type = {
2756 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002757 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002758 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002760 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002761 (destructor)string_dealloc, /* tp_dealloc */
2762 (printfunc)string_print, /* tp_print */
2763 0, /* tp_getattr */
2764 0, /* tp_setattr */
2765 0, /* tp_compare */
2766 (reprfunc)string_repr, /* tp_repr */
2767 0, /* tp_as_number */
2768 &string_as_sequence, /* tp_as_sequence */
2769 0, /* tp_as_mapping */
2770 (hashfunc)string_hash, /* tp_hash */
2771 0, /* tp_call */
2772 (reprfunc)string_str, /* tp_str */
2773 PyObject_GenericGetAttr, /* tp_getattro */
2774 0, /* tp_setattro */
2775 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002776 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002777 string_doc, /* tp_doc */
2778 0, /* tp_traverse */
2779 0, /* tp_clear */
2780 (richcmpfunc)string_richcompare, /* tp_richcompare */
2781 0, /* tp_weaklistoffset */
2782 0, /* tp_iter */
2783 0, /* tp_iternext */
2784 string_methods, /* tp_methods */
2785 0, /* tp_members */
2786 0, /* tp_getset */
2787 0, /* tp_base */
2788 0, /* tp_dict */
2789 0, /* tp_descr_get */
2790 0, /* tp_descr_set */
2791 0, /* tp_dictoffset */
2792 0, /* tp_init */
2793 0, /* tp_alloc */
2794 string_new, /* tp_new */
Guido van Rossum9475a232001-10-05 20:51:39 +00002795 _PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002796};
2797
2798void
Fred Drakeba096332000-07-09 07:04:36 +00002799PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002800{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002802 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002804 if (w == NULL || !PyString_Check(*pv)) {
2805 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002806 *pv = NULL;
2807 return;
2808 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002809 v = string_concat((PyStringObject *) *pv, w);
2810 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002811 *pv = v;
2812}
2813
Guido van Rossum013142a1994-08-30 08:19:36 +00002814void
Fred Drakeba096332000-07-09 07:04:36 +00002815PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002816{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817 PyString_Concat(pv, w);
2818 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002819}
2820
2821
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002822/* The following function breaks the notion that strings are immutable:
2823 it changes the size of a string. We get away with this only if there
2824 is only one module referencing the object. You can also think of it
2825 as creating a new string object and destroying the old one, only
2826 more efficiently. In any case, don't use this if the string may
2827 already be known to some other part of the code... */
2828
2829int
Fred Drakeba096332000-07-09 07:04:36 +00002830_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002831{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002832 register PyObject *v;
2833 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002834 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002835 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002836 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 Py_DECREF(v);
2838 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002839 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002840 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002841 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002842#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002843 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002844#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 _Py_ForgetReference(v);
2846 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002847 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002848 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002849 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002850 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002851 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002852 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002853 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002854 _Py_NewReference(*pv);
2855 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002856 sv->ob_size = newsize;
2857 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002858 return 0;
2859}
Guido van Rossume5372401993-03-16 12:15:04 +00002860
2861/* Helpers for formatstring */
2862
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002863static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002864getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002865{
2866 int argidx = *p_argidx;
2867 if (argidx < arglen) {
2868 (*p_argidx)++;
2869 if (arglen < 0)
2870 return args;
2871 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002872 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002873 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002874 PyErr_SetString(PyExc_TypeError,
2875 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002876 return NULL;
2877}
2878
Tim Peters38fd5b62000-09-21 05:43:11 +00002879/* Format codes
2880 * F_LJUST '-'
2881 * F_SIGN '+'
2882 * F_BLANK ' '
2883 * F_ALT '#'
2884 * F_ZERO '0'
2885 */
Guido van Rossume5372401993-03-16 12:15:04 +00002886#define F_LJUST (1<<0)
2887#define F_SIGN (1<<1)
2888#define F_BLANK (1<<2)
2889#define F_ALT (1<<3)
2890#define F_ZERO (1<<4)
2891
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002892static int
Fred Drakeba096332000-07-09 07:04:36 +00002893formatfloat(char *buf, size_t buflen, int flags,
2894 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002895{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002896 /* fmt = '%#.' + `prec` + `type`
2897 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002898 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002899 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002900 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002901 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002902 if (prec < 0)
2903 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002904 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2905 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002906 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2907 (flags&F_ALT) ? "#" : "",
2908 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002909 /* worst case length calc to ensure no buffer overrun:
2910 fmt = %#.<prec>g
2911 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002912 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002913 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2914 If prec=0 the effective precision is 1 (the leading digit is
2915 always given), therefore increase by one to 10+prec. */
2916 if (buflen <= (size_t)10 + (size_t)prec) {
2917 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002918 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002919 return -1;
2920 }
Tim Peters885d4572001-11-28 20:27:42 +00002921 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002922 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002923}
2924
Tim Peters38fd5b62000-09-21 05:43:11 +00002925/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2926 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2927 * Python's regular ints.
2928 * Return value: a new PyString*, or NULL if error.
2929 * . *pbuf is set to point into it,
2930 * *plen set to the # of chars following that.
2931 * Caller must decref it when done using pbuf.
2932 * The string starting at *pbuf is of the form
2933 * "-"? ("0x" | "0X")? digit+
2934 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002935 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002936 * There will be at least prec digits, zero-filled on the left if
2937 * necessary to get that many.
2938 * val object to be converted
2939 * flags bitmask of format flags; only F_ALT is looked at
2940 * prec minimum number of digits; 0-fill on left if needed
2941 * type a character in [duoxX]; u acts the same as d
2942 *
2943 * CAUTION: o, x and X conversions on regular ints can never
2944 * produce a '-' sign, but can for Python's unbounded ints.
2945 */
2946PyObject*
2947_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2948 char **pbuf, int *plen)
2949{
2950 PyObject *result = NULL;
2951 char *buf;
2952 int i;
2953 int sign; /* 1 if '-', else 0 */
2954 int len; /* number of characters */
2955 int numdigits; /* len == numnondigits + numdigits */
2956 int numnondigits = 0;
2957
2958 switch (type) {
2959 case 'd':
2960 case 'u':
2961 result = val->ob_type->tp_str(val);
2962 break;
2963 case 'o':
2964 result = val->ob_type->tp_as_number->nb_oct(val);
2965 break;
2966 case 'x':
2967 case 'X':
2968 numnondigits = 2;
2969 result = val->ob_type->tp_as_number->nb_hex(val);
2970 break;
2971 default:
2972 assert(!"'type' not in [duoxX]");
2973 }
2974 if (!result)
2975 return NULL;
2976
2977 /* To modify the string in-place, there can only be one reference. */
2978 if (result->ob_refcnt != 1) {
2979 PyErr_BadInternalCall();
2980 return NULL;
2981 }
2982 buf = PyString_AsString(result);
2983 len = PyString_Size(result);
2984 if (buf[len-1] == 'L') {
2985 --len;
2986 buf[len] = '\0';
2987 }
2988 sign = buf[0] == '-';
2989 numnondigits += sign;
2990 numdigits = len - numnondigits;
2991 assert(numdigits > 0);
2992
Tim Petersfff53252001-04-12 18:38:48 +00002993 /* Get rid of base marker unless F_ALT */
2994 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002995 /* Need to skip 0x, 0X or 0. */
2996 int skipped = 0;
2997 switch (type) {
2998 case 'o':
2999 assert(buf[sign] == '0');
3000 /* If 0 is only digit, leave it alone. */
3001 if (numdigits > 1) {
3002 skipped = 1;
3003 --numdigits;
3004 }
3005 break;
3006 case 'x':
3007 case 'X':
3008 assert(buf[sign] == '0');
3009 assert(buf[sign + 1] == 'x');
3010 skipped = 2;
3011 numnondigits -= 2;
3012 break;
3013 }
3014 if (skipped) {
3015 buf += skipped;
3016 len -= skipped;
3017 if (sign)
3018 buf[0] = '-';
3019 }
3020 assert(len == numnondigits + numdigits);
3021 assert(numdigits > 0);
3022 }
3023
3024 /* Fill with leading zeroes to meet minimum width. */
3025 if (prec > numdigits) {
3026 PyObject *r1 = PyString_FromStringAndSize(NULL,
3027 numnondigits + prec);
3028 char *b1;
3029 if (!r1) {
3030 Py_DECREF(result);
3031 return NULL;
3032 }
3033 b1 = PyString_AS_STRING(r1);
3034 for (i = 0; i < numnondigits; ++i)
3035 *b1++ = *buf++;
3036 for (i = 0; i < prec - numdigits; i++)
3037 *b1++ = '0';
3038 for (i = 0; i < numdigits; i++)
3039 *b1++ = *buf++;
3040 *b1 = '\0';
3041 Py_DECREF(result);
3042 result = r1;
3043 buf = PyString_AS_STRING(result);
3044 len = numnondigits + prec;
3045 }
3046
3047 /* Fix up case for hex conversions. */
3048 switch (type) {
3049 case 'x':
3050 /* Need to convert all upper case letters to lower case. */
3051 for (i = 0; i < len; i++)
3052 if (buf[i] >= 'A' && buf[i] <= 'F')
3053 buf[i] += 'a'-'A';
3054 break;
3055 case 'X':
3056 /* Need to convert 0x to 0X (and -0x to -0X). */
3057 if (buf[sign + 1] == 'x')
3058 buf[sign + 1] = 'X';
3059 break;
3060 }
3061 *pbuf = buf;
3062 *plen = len;
3063 return result;
3064}
3065
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003066static int
Fred Drakeba096332000-07-09 07:04:36 +00003067formatint(char *buf, size_t buflen, int flags,
3068 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003069{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003070 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003071 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3072 + 1 + 1 = 24 */
3073 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003074 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003075 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003076 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003077 if (prec < 0)
3078 prec = 1;
Tim Peters885d4572001-11-28 20:27:42 +00003079 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3080 (flags&F_ALT) ? "#" : "",
3081 prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003082 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003083 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003084 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003085 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003086 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003087 return -1;
3088 }
Tim Peters885d4572001-11-28 20:27:42 +00003089 PyOS_snprintf(buf, buflen, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003090 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3091 * but we want it (for consistency with other %#x conversions, and
3092 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003093 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3094 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3095 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003096 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003097 if (x == 0 &&
3098 (flags & F_ALT) &&
3099 (type == 'x' || type == 'X') &&
3100 buf[1] != (char)type) /* this last always true under std C */
3101 {
Tim Petersfff53252001-04-12 18:38:48 +00003102 memmove(buf+2, buf, strlen(buf) + 1);
3103 buf[0] = '0';
3104 buf[1] = (char)type;
3105 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003106 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003107}
3108
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003109static int
Fred Drakeba096332000-07-09 07:04:36 +00003110formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003111{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003112 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003113 if (PyString_Check(v)) {
3114 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003115 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003116 }
3117 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003118 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003119 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003120 }
3121 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003122 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003123}
3124
Guido van Rossum013142a1994-08-30 08:19:36 +00003125
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003126/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3127
3128 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3129 chars are formatted. XXX This is a magic number. Each formatting
3130 routine does bounds checking to ensure no overflow, but a better
3131 solution may be to malloc a buffer of appropriate size for each
3132 format. For now, the current solution is sufficient.
3133*/
3134#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003135
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003137PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003138{
3139 char *fmt, *res;
3140 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003141 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003142 PyObject *result, *orig_args;
3143#ifdef Py_USING_UNICODE
3144 PyObject *v, *w;
3145#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003146 PyObject *dict = NULL;
3147 if (format == NULL || !PyString_Check(format) || args == NULL) {
3148 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003149 return NULL;
3150 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003151 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003152 fmt = PyString_AS_STRING(format);
3153 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003154 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003155 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003156 if (result == NULL)
3157 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003158 res = PyString_AsString(result);
3159 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003160 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003161 argidx = 0;
3162 }
3163 else {
3164 arglen = -1;
3165 argidx = -2;
3166 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003167 if (args->ob_type->tp_as_mapping)
3168 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003169 while (--fmtcnt >= 0) {
3170 if (*fmt != '%') {
3171 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003172 rescnt = fmtcnt + 100;
3173 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003174 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003175 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003176 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003177 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003178 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003179 }
3180 *res++ = *fmt++;
3181 }
3182 else {
3183 /* Got a format specifier */
3184 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003185 int width = -1;
3186 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003187 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003188 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003189 PyObject *v = NULL;
3190 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003191 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003192 int sign;
3193 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003194 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003195#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003196 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003197 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003198#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003199
Guido van Rossumda9c2711996-12-05 21:58:58 +00003200 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003201 if (*fmt == '(') {
3202 char *keystart;
3203 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003205 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003206
3207 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003208 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003209 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003210 goto error;
3211 }
3212 ++fmt;
3213 --fmtcnt;
3214 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003215 /* Skip over balanced parentheses */
3216 while (pcount > 0 && --fmtcnt >= 0) {
3217 if (*fmt == ')')
3218 --pcount;
3219 else if (*fmt == '(')
3220 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003221 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003222 }
3223 keylen = fmt - keystart - 1;
3224 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003225 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003226 "incomplete format key");
3227 goto error;
3228 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003229 key = PyString_FromStringAndSize(keystart,
3230 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003231 if (key == NULL)
3232 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003233 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003234 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003235 args_owned = 0;
3236 }
3237 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003239 if (args == NULL) {
3240 goto error;
3241 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003242 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003243 arglen = -1;
3244 argidx = -2;
3245 }
Guido van Rossume5372401993-03-16 12:15:04 +00003246 while (--fmtcnt >= 0) {
3247 switch (c = *fmt++) {
3248 case '-': flags |= F_LJUST; continue;
3249 case '+': flags |= F_SIGN; continue;
3250 case ' ': flags |= F_BLANK; continue;
3251 case '#': flags |= F_ALT; continue;
3252 case '0': flags |= F_ZERO; continue;
3253 }
3254 break;
3255 }
3256 if (c == '*') {
3257 v = getnextarg(args, arglen, &argidx);
3258 if (v == NULL)
3259 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003260 if (!PyInt_Check(v)) {
3261 PyErr_SetString(PyExc_TypeError,
3262 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003263 goto error;
3264 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003265 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003266 if (width < 0) {
3267 flags |= F_LJUST;
3268 width = -width;
3269 }
Guido van Rossume5372401993-03-16 12:15:04 +00003270 if (--fmtcnt >= 0)
3271 c = *fmt++;
3272 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003273 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003274 width = c - '0';
3275 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003276 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003277 if (!isdigit(c))
3278 break;
3279 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 PyErr_SetString(
3281 PyExc_ValueError,
3282 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003283 goto error;
3284 }
3285 width = width*10 + (c - '0');
3286 }
3287 }
3288 if (c == '.') {
3289 prec = 0;
3290 if (--fmtcnt >= 0)
3291 c = *fmt++;
3292 if (c == '*') {
3293 v = getnextarg(args, arglen, &argidx);
3294 if (v == NULL)
3295 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 if (!PyInt_Check(v)) {
3297 PyErr_SetString(
3298 PyExc_TypeError,
3299 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003300 goto error;
3301 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003302 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003303 if (prec < 0)
3304 prec = 0;
3305 if (--fmtcnt >= 0)
3306 c = *fmt++;
3307 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003308 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003309 prec = c - '0';
3310 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003311 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003312 if (!isdigit(c))
3313 break;
3314 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003315 PyErr_SetString(
3316 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003317 "prec too big");
3318 goto error;
3319 }
3320 prec = prec*10 + (c - '0');
3321 }
3322 }
3323 } /* prec */
3324 if (fmtcnt >= 0) {
3325 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003326 if (--fmtcnt >= 0)
3327 c = *fmt++;
3328 }
3329 }
3330 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003331 PyErr_SetString(PyExc_ValueError,
3332 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003333 goto error;
3334 }
3335 if (c != '%') {
3336 v = getnextarg(args, arglen, &argidx);
3337 if (v == NULL)
3338 goto error;
3339 }
3340 sign = 0;
3341 fill = ' ';
3342 switch (c) {
3343 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003344 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003345 len = 1;
3346 break;
3347 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003348 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003349#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003350 if (PyUnicode_Check(v)) {
3351 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003352 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003353 goto unicode;
3354 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003355#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003356 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003357 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003358 else
3359 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003360 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003361 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003362 if (!PyString_Check(temp)) {
3363 PyErr_SetString(PyExc_TypeError,
3364 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003365 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003366 goto error;
3367 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003368 pbuf = PyString_AS_STRING(temp);
3369 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003370 if (prec >= 0 && len > prec)
3371 len = prec;
3372 break;
3373 case 'i':
3374 case 'd':
3375 case 'u':
3376 case 'o':
3377 case 'x':
3378 case 'X':
3379 if (c == 'i')
3380 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003381 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003382 temp = _PyString_FormatLong(v, flags,
3383 prec, c, &pbuf, &len);
3384 if (!temp)
3385 goto error;
3386 /* unbounded ints can always produce
3387 a sign character! */
3388 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003389 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003390 else {
3391 pbuf = formatbuf;
3392 len = formatint(pbuf, sizeof(formatbuf),
3393 flags, prec, c, v);
3394 if (len < 0)
3395 goto error;
3396 /* only d conversion is signed */
3397 sign = c == 'd';
3398 }
3399 if (flags & F_ZERO)
3400 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003401 break;
3402 case 'e':
3403 case 'E':
3404 case 'f':
3405 case 'g':
3406 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003407 pbuf = formatbuf;
3408 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003409 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003410 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003411 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003412 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003413 fill = '0';
3414 break;
3415 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003416 pbuf = formatbuf;
3417 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003418 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003419 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003420 break;
3421 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003422 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003423 "unsupported format character '%c' (0x%x) "
3424 "at index %i",
3425 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003426 goto error;
3427 }
3428 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003429 if (*pbuf == '-' || *pbuf == '+') {
3430 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003431 len--;
3432 }
3433 else if (flags & F_SIGN)
3434 sign = '+';
3435 else if (flags & F_BLANK)
3436 sign = ' ';
3437 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003438 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003439 }
3440 if (width < len)
3441 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003442 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003443 reslen -= rescnt;
3444 rescnt = width + fmtcnt + 100;
3445 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003447 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003448 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003450 }
3451 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003452 if (fill != ' ')
3453 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003454 rescnt--;
3455 if (width > len)
3456 width--;
3457 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003458 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3459 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003460 assert(pbuf[1] == c);
3461 if (fill != ' ') {
3462 *res++ = *pbuf++;
3463 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003464 }
Tim Petersfff53252001-04-12 18:38:48 +00003465 rescnt -= 2;
3466 width -= 2;
3467 if (width < 0)
3468 width = 0;
3469 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003470 }
3471 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003472 do {
3473 --rescnt;
3474 *res++ = fill;
3475 } while (--width > len);
3476 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003477 if (fill == ' ') {
3478 if (sign)
3479 *res++ = sign;
3480 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003481 (c == 'x' || c == 'X')) {
3482 assert(pbuf[0] == '0');
3483 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003484 *res++ = *pbuf++;
3485 *res++ = *pbuf++;
3486 }
3487 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003488 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003489 res += len;
3490 rescnt -= len;
3491 while (--width >= len) {
3492 --rescnt;
3493 *res++ = ' ';
3494 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003495 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003497 "not all arguments converted");
3498 goto error;
3499 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003500 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003501 } /* '%' */
3502 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003503 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 PyErr_SetString(PyExc_TypeError,
3505 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003506 goto error;
3507 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003508 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003510 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003512 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003513
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003514#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003515 unicode:
3516 if (args_owned) {
3517 Py_DECREF(args);
3518 args_owned = 0;
3519 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003520 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003521 if (PyTuple_Check(orig_args) && argidx > 0) {
3522 PyObject *v;
3523 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3524 v = PyTuple_New(n);
3525 if (v == NULL)
3526 goto error;
3527 while (--n >= 0) {
3528 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3529 Py_INCREF(w);
3530 PyTuple_SET_ITEM(v, n, w);
3531 }
3532 args = v;
3533 } else {
3534 Py_INCREF(orig_args);
3535 args = orig_args;
3536 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003537 args_owned = 1;
3538 /* Take what we have of the result and let the Unicode formatting
3539 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003540 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003541 if (_PyString_Resize(&result, rescnt))
3542 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003543 fmtcnt = PyString_GET_SIZE(format) - \
3544 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003545 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3546 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003547 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003548 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003549 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003550 if (v == NULL)
3551 goto error;
3552 /* Paste what we have (result) to what the Unicode formatting
3553 function returned (v) and return the result (or error) */
3554 w = PyUnicode_Concat(result, v);
3555 Py_DECREF(result);
3556 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003557 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003558 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003559#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003560
Guido van Rossume5372401993-03-16 12:15:04 +00003561 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003562 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003563 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003564 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003565 }
Guido van Rossume5372401993-03-16 12:15:04 +00003566 return NULL;
3567}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003568
3569
3570#ifdef INTERN_STRINGS
3571
Barry Warsaw4df762f2000-08-16 23:41:01 +00003572/* This dictionary will leak at PyString_Fini() time. That's acceptable
3573 * because PyString_Fini() specifically frees interned strings that are
3574 * only referenced by this dictionary. The CVS log entry for revision 2.45
3575 * says:
3576 *
3577 * Change the Fini function to only remove otherwise unreferenced
3578 * strings from the interned table. There are references in
3579 * hard-to-find static variables all over the interpreter, and it's not
3580 * worth trying to get rid of all those; but "uninterning" isn't fair
3581 * either and may cause subtle failures later -- so we have to keep them
3582 * in the interned table.
3583 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003584static PyObject *interned;
3585
3586void
Fred Drakeba096332000-07-09 07:04:36 +00003587PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003588{
3589 register PyStringObject *s = (PyStringObject *)(*p);
3590 PyObject *t;
3591 if (s == NULL || !PyString_Check(s))
3592 Py_FatalError("PyString_InternInPlace: strings only please!");
3593 if ((t = s->ob_sinterned) != NULL) {
3594 if (t == (PyObject *)s)
3595 return;
3596 Py_INCREF(t);
3597 *p = t;
3598 Py_DECREF(s);
3599 return;
3600 }
3601 if (interned == NULL) {
3602 interned = PyDict_New();
3603 if (interned == NULL)
3604 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003605 }
3606 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3607 Py_INCREF(t);
3608 *p = s->ob_sinterned = t;
3609 Py_DECREF(s);
3610 return;
3611 }
Tim Peters111f6092001-09-12 07:54:51 +00003612 /* Ensure that only true string objects appear in the intern dict,
3613 and as the value of ob_sinterned. */
3614 if (PyString_CheckExact(s)) {
3615 t = (PyObject *)s;
3616 if (PyDict_SetItem(interned, t, t) == 0) {
3617 s->ob_sinterned = t;
3618 return;
3619 }
3620 }
3621 else {
3622 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3623 PyString_GET_SIZE(s));
3624 if (t != NULL) {
3625 if (PyDict_SetItem(interned, t, t) == 0) {
3626 *p = s->ob_sinterned = t;
3627 Py_DECREF(s);
3628 return;
3629 }
3630 Py_DECREF(t);
3631 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003632 }
3633 PyErr_Clear();
3634}
3635
3636
3637PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003638PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003639{
3640 PyObject *s = PyString_FromString(cp);
3641 if (s == NULL)
3642 return NULL;
3643 PyString_InternInPlace(&s);
3644 return s;
3645}
3646
3647#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003648
3649void
Fred Drakeba096332000-07-09 07:04:36 +00003650PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003651{
3652 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003653 for (i = 0; i < UCHAR_MAX + 1; i++) {
3654 Py_XDECREF(characters[i]);
3655 characters[i] = NULL;
3656 }
3657#ifndef DONT_SHARE_SHORT_STRINGS
3658 Py_XDECREF(nullstring);
3659 nullstring = NULL;
3660#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003661#ifdef INTERN_STRINGS
3662 if (interned) {
3663 int pos, changed;
3664 PyObject *key, *value;
3665 do {
3666 changed = 0;
3667 pos = 0;
3668 while (PyDict_Next(interned, &pos, &key, &value)) {
3669 if (key->ob_refcnt == 2 && key == value) {
3670 PyDict_DelItem(interned, key);
3671 changed = 1;
3672 }
3673 }
3674 } while (changed);
3675 }
3676#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003677}
Barry Warsawa903ad982001-02-23 16:40:48 +00003678
3679#ifdef INTERN_STRINGS
3680void _Py_ReleaseInternedStrings(void)
3681{
3682 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003683 fprintf(stderr, "releasing interned strings\n");
3684 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003685 Py_DECREF(interned);
3686 interned = NULL;
3687 }
3688}
3689#endif /* INTERN_STRINGS */