blob: e29be5a07007bfc696b5783f0b57180d845def25 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000302 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000303 va_list vargs;
304
305#ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307#else
308 va_start(vargs);
309#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313}
314
315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000316PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
320{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000321 PyObject *v, *str;
322
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
329}
330
331PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
334{
335 PyObject *v;
336
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
340 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000341
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342 if (encoding == NULL) {
343#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000344 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000345#else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348#endif
349 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350
351 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355
356 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 return NULL;
360}
361
362PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v;
367
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
388 }
389
390 return v;
391
392 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 return NULL;
394}
395
396PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
409}
410
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
420 }
421
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422 if (encoding == NULL) {
423#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428#endif
429 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000435
436 return v;
437
438 onError:
439 return NULL;
440}
441
442PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v;
447
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000448 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449 if (v == NULL)
450 goto onError;
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
468 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000471
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 onError:
473 return NULL;
474}
475
Guido van Rossum234f9421993-06-17 12:35:49 +0000476static void
Fred Drakeba096332000-07-09 07:04:36 +0000477string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000478{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000480}
481
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000482static int
483string_getsize(register PyObject *op)
484{
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
490}
491
492static /*const*/ char *
493string_getbuffer(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
500}
501
Guido van Rossumd7047b31995-01-02 19:07:15 +0000502int
Fred Drakeba096332000-07-09 07:04:36 +0000503PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000505 if (!PyString_Check(op))
506 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000511PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000513 if (!PyString_Check(op))
514 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518int
519PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
522{
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
526 }
527
528 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000529#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535 else
536#endif
537 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
542 }
543 }
544
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
552 }
553 return 0;
554}
555
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556/* Methods */
557
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560{
561 int i;
562 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000563 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000567 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569
Thomas Wouters7e474022000-07-16 12:04:32 +0000570 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
574
575 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000589 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000596string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
603 }
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000606 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
608 else {
609 register int i;
610 register char c;
611 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 int quote;
613
Thomas Wouters7e474022000-07-16 12:04:32 +0000614 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 }
635 else
636 *p++ = c;
637 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000642 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644}
645
Guido van Rossum189f1df2001-05-01 16:51:53 +0000646static PyObject *
647string_str(PyObject *s)
648{
649 Py_INCREF(s);
650 return s;
651}
652
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653static int
Fred Drakeba096332000-07-09 07:04:36 +0000654string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655{
656 return a->ob_size;
657}
658
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000660string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661{
662 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000668#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000669 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 return NULL;
673 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000676 if ((a->ob_size == 0 || b->ob_size == 0) &&
677 PyString_CheckExact(a) && PyString_CheckExact(b)) {
678 if (a->ob_size == 0) {
679 Py_INCREF(bb);
680 return bb;
681 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000682 Py_INCREF(a);
683 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684 }
685 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000686 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000688 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000689 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000690 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000691 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000692#ifdef CACHE_HASH
693 op->ob_shash = -1;
694#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000695#ifdef INTERN_STRINGS
696 op->ob_sinterned = NULL;
697#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000698 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
699 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
700 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702#undef b
703}
704
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000705static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000706string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707{
708 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000709 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000711 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712 if (n < 0)
713 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000718 if (n && size / n != a->ob_size) {
719 PyErr_SetString(PyExc_OverflowError,
720 "repeated string is too long");
721 return NULL;
722 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000723 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 Py_INCREF(a);
725 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 }
Tim Peters8f422462000-09-09 06:13:41 +0000727 nbytes = size * sizeof(char);
728 if (nbytes / sizeof(char) != (size_t)size ||
729 nbytes + sizeof(PyStringObject) <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated string is too long");
732 return NULL;
733 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000734 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000735 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000736 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000738 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000739#ifdef CACHE_HASH
740 op->ob_shash = -1;
741#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000742#ifdef INTERN_STRINGS
743 op->ob_sinterned = NULL;
744#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000745 for (i = 0; i < size; i += a->ob_size)
746 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
747 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749}
750
751/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
752
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000754string_slice(register PyStringObject *a, register int i, register int j)
755 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756{
757 if (i < 0)
758 i = 0;
759 if (j < 0)
760 j = 0; /* Avoid signed/unsigned bug in next line */
761 if (j > a->ob_size)
762 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000763 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
764 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 Py_INCREF(a);
766 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
768 if (j < i)
769 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771}
772
Guido van Rossum9284a572000-03-07 15:53:43 +0000773static int
Fred Drakeba096332000-07-09 07:04:36 +0000774string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000775{
776 register char *s, *end;
777 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000778#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000779 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000780 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000781#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000782 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000783 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000784 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000785 return -1;
786 }
787 c = PyString_AsString(el)[0];
788 s = PyString_AsString(a);
789 end = s + PyString_Size(a);
790 while (s < end) {
791 if (c == *s++)
792 return 1;
793 }
794 return 0;
795}
796
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000798string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000800 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000801 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000803 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804 return NULL;
805 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000807 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000808 if (v == NULL)
809 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000810 else {
811#ifdef COUNT_ALLOCS
812 one_strings++;
813#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000814 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000815 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000816 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817}
818
Martin v. Löwiscd353062001-05-24 16:56:35 +0000819static PyObject*
820string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000822 int c;
823 int len_a, len_b;
824 int min_len;
825 PyObject *result;
826
Guido van Rossumbb77e682001-09-24 16:51:54 +0000827 /* May sure both arguments use string comparison.
828 This implies PyString_Check(a) && PyString_Check(b). */
829 if (a->ob_type->tp_richcompare != (richcmpfunc)string_richcompare ||
830 b->ob_type->tp_richcompare != (richcmpfunc)string_richcompare) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 result = Py_NotImplemented;
832 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000833 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000834 if (a == b) {
835 switch (op) {
836 case Py_EQ:case Py_LE:case Py_GE:
837 result = Py_True;
838 goto out;
839 case Py_NE:case Py_LT:case Py_GT:
840 result = Py_False;
841 goto out;
842 }
843 }
844 if (op == Py_EQ) {
845 /* Supporting Py_NE here as well does not save
846 much time, since Py_NE is rarely used. */
847 if (a->ob_size == b->ob_size
848 && (a->ob_sval[0] == b->ob_sval[0]
849 && memcmp(a->ob_sval, b->ob_sval,
850 a->ob_size) == 0)) {
851 result = Py_True;
852 } else {
853 result = Py_False;
854 }
855 goto out;
856 }
857 len_a = a->ob_size; len_b = b->ob_size;
858 min_len = (len_a < len_b) ? len_a : len_b;
859 if (min_len > 0) {
860 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
861 if (c==0)
862 c = memcmp(a->ob_sval, b->ob_sval, min_len);
863 }else
864 c = 0;
865 if (c == 0)
866 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
867 switch (op) {
868 case Py_LT: c = c < 0; break;
869 case Py_LE: c = c <= 0; break;
870 case Py_EQ: assert(0); break; /* unreachable */
871 case Py_NE: c = c != 0; break;
872 case Py_GT: c = c > 0; break;
873 case Py_GE: c = c >= 0; break;
874 default:
875 result = Py_NotImplemented;
876 goto out;
877 }
878 result = c ? Py_True : Py_False;
879 out:
880 Py_INCREF(result);
881 return result;
882}
883
884int
885_PyString_Eq(PyObject *o1, PyObject *o2)
886{
887 PyStringObject *a, *b;
888 a = (PyStringObject*)o1;
889 b = (PyStringObject*)o2;
890 return a->ob_size == b->ob_size
891 && *a->ob_sval == *b->ob_sval
892 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893}
894
Guido van Rossum9bfef441993-03-29 10:43:31 +0000895static long
Fred Drakeba096332000-07-09 07:04:36 +0000896string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000897{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000898 register int len;
899 register unsigned char *p;
900 register long x;
901
902#ifdef CACHE_HASH
903 if (a->ob_shash != -1)
904 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000905#ifdef INTERN_STRINGS
906 if (a->ob_sinterned != NULL)
907 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000909#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000910#endif
911 len = a->ob_size;
912 p = (unsigned char *) a->ob_sval;
913 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000914 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000915 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000916 x ^= a->ob_size;
917 if (x == -1)
918 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919#ifdef CACHE_HASH
920 a->ob_shash = x;
921#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000922 return x;
923}
924
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000925static int
Fred Drakeba096332000-07-09 07:04:36 +0000926string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000927{
928 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000929 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000930 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000931 return -1;
932 }
933 *ptr = (void *)self->ob_sval;
934 return self->ob_size;
935}
936
937static int
Fred Drakeba096332000-07-09 07:04:36 +0000938string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000939{
Guido van Rossum045e6881997-09-08 18:30:11 +0000940 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000941 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000942 return -1;
943}
944
945static int
Fred Drakeba096332000-07-09 07:04:36 +0000946string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000947{
948 if ( lenp )
949 *lenp = self->ob_size;
950 return 1;
951}
952
Guido van Rossum1db70701998-10-08 02:18:52 +0000953static int
Fred Drakeba096332000-07-09 07:04:36 +0000954string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000955{
956 if ( index != 0 ) {
957 PyErr_SetString(PyExc_SystemError,
958 "accessing non-existent string segment");
959 return -1;
960 }
961 *ptr = self->ob_sval;
962 return self->ob_size;
963}
964
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000966 (inquiry)string_length, /*sq_length*/
967 (binaryfunc)string_concat, /*sq_concat*/
968 (intargfunc)string_repeat, /*sq_repeat*/
969 (intargfunc)string_item, /*sq_item*/
970 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000971 0, /*sq_ass_item*/
972 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000973 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974};
975
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000976static PyBufferProcs string_as_buffer = {
977 (getreadbufferproc)string_buffer_getreadbuf,
978 (getwritebufferproc)string_buffer_getwritebuf,
979 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000980 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000981};
982
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000983
984
985#define LEFTSTRIP 0
986#define RIGHTSTRIP 1
987#define BOTHSTRIP 2
988
989
990static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000991split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000992{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000993 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000994 PyObject* item;
995 PyObject *list = PyList_New(0);
996
997 if (list == NULL)
998 return NULL;
999
Guido van Rossum4c08d552000-03-10 22:55:18 +00001000 for (i = j = 0; i < len; ) {
1001 while (i < len && isspace(Py_CHARMASK(s[i])))
1002 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001003 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001004 while (i < len && !isspace(Py_CHARMASK(s[i])))
1005 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001007 if (maxsplit-- <= 0)
1008 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001009 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1010 if (item == NULL)
1011 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012 err = PyList_Append(list, item);
1013 Py_DECREF(item);
1014 if (err < 0)
1015 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001016 while (i < len && isspace(Py_CHARMASK(s[i])))
1017 i++;
1018 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001019 }
1020 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001021 if (j < len) {
1022 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1023 if (item == NULL)
1024 goto finally;
1025 err = PyList_Append(list, item);
1026 Py_DECREF(item);
1027 if (err < 0)
1028 goto finally;
1029 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 return list;
1031 finally:
1032 Py_DECREF(list);
1033 return NULL;
1034}
1035
1036
1037static char split__doc__[] =
1038"S.split([sep [,maxsplit]]) -> list of strings\n\
1039\n\
1040Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001041delimiter string. If maxsplit is given, at most maxsplit\n\
1042splits are done. If sep is not specified, any whitespace string\n\
1043is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044
1045static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001046string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047{
1048 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001049 int maxsplit = -1;
1050 const char *s = PyString_AS_STRING(self), *sub;
1051 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001055 if (maxsplit < 0)
1056 maxsplit = INT_MAX;
1057 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001058 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001059 if (PyString_Check(subobj)) {
1060 sub = PyString_AS_STRING(subobj);
1061 n = PyString_GET_SIZE(subobj);
1062 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001063#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064 else if (PyUnicode_Check(subobj))
1065 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001067 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1068 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001069 if (n == 0) {
1070 PyErr_SetString(PyExc_ValueError, "empty separator");
1071 return NULL;
1072 }
1073
1074 list = PyList_New(0);
1075 if (list == NULL)
1076 return NULL;
1077
1078 i = j = 0;
1079 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001080 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001081 if (maxsplit-- <= 0)
1082 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001083 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1084 if (item == NULL)
1085 goto fail;
1086 err = PyList_Append(list, item);
1087 Py_DECREF(item);
1088 if (err < 0)
1089 goto fail;
1090 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091 }
1092 else
1093 i++;
1094 }
1095 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1096 if (item == NULL)
1097 goto fail;
1098 err = PyList_Append(list, item);
1099 Py_DECREF(item);
1100 if (err < 0)
1101 goto fail;
1102
1103 return list;
1104
1105 fail:
1106 Py_DECREF(list);
1107 return NULL;
1108}
1109
1110
1111static char join__doc__[] =
1112"S.join(sequence) -> string\n\
1113\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001114Return a string which is the concatenation of the strings in the\n\
1115sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116
1117static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001118string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119{
1120 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001121 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001123 char *p;
1124 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001125 size_t sz = 0;
1126 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001127 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001128
Tim Peters19fe14e2001-01-19 03:03:47 +00001129 seq = PySequence_Fast(orig, "");
1130 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001131 if (PyErr_ExceptionMatches(PyExc_TypeError))
1132 PyErr_Format(PyExc_TypeError,
1133 "sequence expected, %.80s found",
1134 orig->ob_type->tp_name);
1135 return NULL;
1136 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001137
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001138 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001139 if (seqlen == 0) {
1140 Py_DECREF(seq);
1141 return PyString_FromString("");
1142 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001144 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001145 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1146 PyErr_Format(PyExc_TypeError,
1147 "sequence item 0: expected string,"
1148 " %.80s found",
1149 item->ob_type->tp_name);
1150 Py_DECREF(seq);
1151 return NULL;
1152 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001153 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001154 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001155 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001157
Tim Peters19fe14e2001-01-19 03:03:47 +00001158 /* There are at least two things to join. Do a pre-pass to figure out
1159 * the total amount of space we'll need (sz), see whether any argument
1160 * is absurd, and defer to the Unicode join if appropriate.
1161 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001162 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001163 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001164 item = PySequence_Fast_GET_ITEM(seq, i);
1165 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001166#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001167 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001168 /* Defer to Unicode join.
1169 * CAUTION: There's no gurantee that the
1170 * original sequence can be iterated over
1171 * again, so we must pass seq here.
1172 */
1173 PyObject *result;
1174 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001175 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001176 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001177 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001178#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001179 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001180 "sequence item %i: expected string,"
1181 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001182 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001183 Py_DECREF(seq);
1184 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001186 sz += PyString_GET_SIZE(item);
1187 if (i != 0)
1188 sz += seplen;
1189 if (sz < old_sz || sz > INT_MAX) {
1190 PyErr_SetString(PyExc_OverflowError,
1191 "join() is too long for a Python string");
1192 Py_DECREF(seq);
1193 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001195 }
1196
1197 /* Allocate result space. */
1198 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1199 if (res == NULL) {
1200 Py_DECREF(seq);
1201 return NULL;
1202 }
1203
1204 /* Catenate everything. */
1205 p = PyString_AS_STRING(res);
1206 for (i = 0; i < seqlen; ++i) {
1207 size_t n;
1208 item = PySequence_Fast_GET_ITEM(seq, i);
1209 n = PyString_GET_SIZE(item);
1210 memcpy(p, PyString_AS_STRING(item), n);
1211 p += n;
1212 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001213 memcpy(p, sep, seplen);
1214 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001215 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001217
Jeremy Hylton49048292000-07-11 03:28:17 +00001218 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220}
1221
Tim Peters52e155e2001-06-16 05:42:57 +00001222PyObject *
1223_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001224{
Tim Petersa7259592001-06-16 05:11:17 +00001225 assert(sep != NULL && PyString_Check(sep));
1226 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001227 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001228}
1229
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230static long
Fred Drakeba096332000-07-09 07:04:36 +00001231string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001232{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001233 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234 int len = PyString_GET_SIZE(self);
1235 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001236 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001238 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001239 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001240 return -2;
1241 if (PyString_Check(subobj)) {
1242 sub = PyString_AS_STRING(subobj);
1243 n = PyString_GET_SIZE(subobj);
1244 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001245#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001246 else if (PyUnicode_Check(subobj))
1247 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001248#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250 return -2;
1251
1252 if (last > len)
1253 last = len;
1254 if (last < 0)
1255 last += len;
1256 if (last < 0)
1257 last = 0;
1258 if (i < 0)
1259 i += len;
1260 if (i < 0)
1261 i = 0;
1262
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 if (dir > 0) {
1264 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 last -= n;
1267 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001268 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001269 return (long)i;
1270 }
1271 else {
1272 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001273
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274 if (n == 0 && i <= last)
1275 return (long)last;
1276 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001277 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001278 return (long)j;
1279 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001280
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 return -1;
1282}
1283
1284
1285static char find__doc__[] =
1286"S.find(sub [,start [,end]]) -> int\n\
1287\n\
1288Return the lowest index in S where substring sub is found,\n\
1289such that sub is contained within s[start,end]. Optional\n\
1290arguments start and end are interpreted as in slice notation.\n\
1291\n\
1292Return -1 on failure.";
1293
1294static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001295string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298 if (result == -2)
1299 return NULL;
1300 return PyInt_FromLong(result);
1301}
1302
1303
1304static char index__doc__[] =
1305"S.index(sub [,start [,end]]) -> int\n\
1306\n\
1307Like S.find() but raise ValueError when the substring is not found.";
1308
1309static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001310string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313 if (result == -2)
1314 return NULL;
1315 if (result == -1) {
1316 PyErr_SetString(PyExc_ValueError,
1317 "substring not found in string.index");
1318 return NULL;
1319 }
1320 return PyInt_FromLong(result);
1321}
1322
1323
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324static char rfind__doc__[] =
1325"S.rfind(sub [,start [,end]]) -> int\n\
1326\n\
1327Return the highest index in S where substring sub is found,\n\
1328such that sub is contained within s[start,end]. Optional\n\
1329arguments start and end are interpreted as in slice notation.\n\
1330\n\
1331Return -1 on failure.";
1332
1333static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001334string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 if (result == -2)
1338 return NULL;
1339 return PyInt_FromLong(result);
1340}
1341
1342
1343static char rindex__doc__[] =
1344"S.rindex(sub [,start [,end]]) -> int\n\
1345\n\
1346Like S.rfind() but raise ValueError when the substring is not found.";
1347
1348static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001349string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 if (result == -2)
1353 return NULL;
1354 if (result == -1) {
1355 PyErr_SetString(PyExc_ValueError,
1356 "substring not found in string.rindex");
1357 return NULL;
1358 }
1359 return PyInt_FromLong(result);
1360}
1361
1362
1363static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001364do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365{
1366 char *s = PyString_AS_STRING(self);
1367 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 i = 0;
1370 if (striptype != RIGHTSTRIP) {
1371 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1372 i++;
1373 }
1374 }
1375
1376 j = len;
1377 if (striptype != LEFTSTRIP) {
1378 do {
1379 j--;
1380 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1381 j++;
1382 }
1383
Tim Peters8fa5dd02001-09-12 02:18:30 +00001384 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 Py_INCREF(self);
1386 return (PyObject*)self;
1387 }
1388 else
1389 return PyString_FromStringAndSize(s+i, j-i);
1390}
1391
1392
1393static char strip__doc__[] =
1394"S.strip() -> string\n\
1395\n\
1396Return a copy of the string S with leading and trailing\n\
1397whitespace removed.";
1398
1399static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001400string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001402 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403}
1404
1405
1406static char lstrip__doc__[] =
1407"S.lstrip() -> string\n\
1408\n\
1409Return a copy of the string S with leading whitespace removed.";
1410
1411static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001412string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001414 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415}
1416
1417
1418static char rstrip__doc__[] =
1419"S.rstrip() -> string\n\
1420\n\
1421Return a copy of the string S with trailing whitespace removed.";
1422
1423static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001424string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001426 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427}
1428
1429
1430static char lower__doc__[] =
1431"S.lower() -> string\n\
1432\n\
1433Return a copy of the string S converted to lowercase.";
1434
1435static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001436string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437{
1438 char *s = PyString_AS_STRING(self), *s_new;
1439 int i, n = PyString_GET_SIZE(self);
1440 PyObject *new;
1441
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001442 new = PyString_FromStringAndSize(NULL, n);
1443 if (new == NULL)
1444 return NULL;
1445 s_new = PyString_AsString(new);
1446 for (i = 0; i < n; i++) {
1447 int c = Py_CHARMASK(*s++);
1448 if (isupper(c)) {
1449 *s_new = tolower(c);
1450 } else
1451 *s_new = c;
1452 s_new++;
1453 }
1454 return new;
1455}
1456
1457
1458static char upper__doc__[] =
1459"S.upper() -> string\n\
1460\n\
1461Return a copy of the string S converted to uppercase.";
1462
1463static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001464string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465{
1466 char *s = PyString_AS_STRING(self), *s_new;
1467 int i, n = PyString_GET_SIZE(self);
1468 PyObject *new;
1469
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 new = PyString_FromStringAndSize(NULL, n);
1471 if (new == NULL)
1472 return NULL;
1473 s_new = PyString_AsString(new);
1474 for (i = 0; i < n; i++) {
1475 int c = Py_CHARMASK(*s++);
1476 if (islower(c)) {
1477 *s_new = toupper(c);
1478 } else
1479 *s_new = c;
1480 s_new++;
1481 }
1482 return new;
1483}
1484
1485
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486static char title__doc__[] =
1487"S.title() -> string\n\
1488\n\
1489Return a titlecased version of S, i.e. words start with uppercase\n\
1490characters, all remaining cased characters have lowercase.";
1491
1492static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001493string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494{
1495 char *s = PyString_AS_STRING(self), *s_new;
1496 int i, n = PyString_GET_SIZE(self);
1497 int previous_is_cased = 0;
1498 PyObject *new;
1499
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 new = PyString_FromStringAndSize(NULL, n);
1501 if (new == NULL)
1502 return NULL;
1503 s_new = PyString_AsString(new);
1504 for (i = 0; i < n; i++) {
1505 int c = Py_CHARMASK(*s++);
1506 if (islower(c)) {
1507 if (!previous_is_cased)
1508 c = toupper(c);
1509 previous_is_cased = 1;
1510 } else if (isupper(c)) {
1511 if (previous_is_cased)
1512 c = tolower(c);
1513 previous_is_cased = 1;
1514 } else
1515 previous_is_cased = 0;
1516 *s_new++ = c;
1517 }
1518 return new;
1519}
1520
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521static char capitalize__doc__[] =
1522"S.capitalize() -> string\n\
1523\n\
1524Return a copy of the string S with only its first character\n\
1525capitalized.";
1526
1527static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001528string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529{
1530 char *s = PyString_AS_STRING(self), *s_new;
1531 int i, n = PyString_GET_SIZE(self);
1532 PyObject *new;
1533
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534 new = PyString_FromStringAndSize(NULL, n);
1535 if (new == NULL)
1536 return NULL;
1537 s_new = PyString_AsString(new);
1538 if (0 < n) {
1539 int c = Py_CHARMASK(*s++);
1540 if (islower(c))
1541 *s_new = toupper(c);
1542 else
1543 *s_new = c;
1544 s_new++;
1545 }
1546 for (i = 1; i < n; i++) {
1547 int c = Py_CHARMASK(*s++);
1548 if (isupper(c))
1549 *s_new = tolower(c);
1550 else
1551 *s_new = c;
1552 s_new++;
1553 }
1554 return new;
1555}
1556
1557
1558static char count__doc__[] =
1559"S.count(sub[, start[, end]]) -> int\n\
1560\n\
1561Return the number of occurrences of substring sub in string\n\
1562S[start:end]. Optional arguments start and end are\n\
1563interpreted as in slice notation.";
1564
1565static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001566string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569 int len = PyString_GET_SIZE(self), n;
1570 int i = 0, last = INT_MAX;
1571 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573
Guido van Rossumc6821402000-05-08 14:08:05 +00001574 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1575 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001577
Guido van Rossum4c08d552000-03-10 22:55:18 +00001578 if (PyString_Check(subobj)) {
1579 sub = PyString_AS_STRING(subobj);
1580 n = PyString_GET_SIZE(subobj);
1581 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001582#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001583 else if (PyUnicode_Check(subobj)) {
1584 int count;
1585 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1586 if (count == -1)
1587 return NULL;
1588 else
1589 return PyInt_FromLong((long) count);
1590 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001591#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001592 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1593 return NULL;
1594
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595 if (last > len)
1596 last = len;
1597 if (last < 0)
1598 last += len;
1599 if (last < 0)
1600 last = 0;
1601 if (i < 0)
1602 i += len;
1603 if (i < 0)
1604 i = 0;
1605 m = last + 1 - n;
1606 if (n == 0)
1607 return PyInt_FromLong((long) (m-i));
1608
1609 r = 0;
1610 while (i < m) {
1611 if (!memcmp(s+i, sub, n)) {
1612 r++;
1613 i += n;
1614 } else {
1615 i++;
1616 }
1617 }
1618 return PyInt_FromLong((long) r);
1619}
1620
1621
1622static char swapcase__doc__[] =
1623"S.swapcase() -> string\n\
1624\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001625Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626converted to lowercase and vice versa.";
1627
1628static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001629string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630{
1631 char *s = PyString_AS_STRING(self), *s_new;
1632 int i, n = PyString_GET_SIZE(self);
1633 PyObject *new;
1634
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 new = PyString_FromStringAndSize(NULL, n);
1636 if (new == NULL)
1637 return NULL;
1638 s_new = PyString_AsString(new);
1639 for (i = 0; i < n; i++) {
1640 int c = Py_CHARMASK(*s++);
1641 if (islower(c)) {
1642 *s_new = toupper(c);
1643 }
1644 else if (isupper(c)) {
1645 *s_new = tolower(c);
1646 }
1647 else
1648 *s_new = c;
1649 s_new++;
1650 }
1651 return new;
1652}
1653
1654
1655static char translate__doc__[] =
1656"S.translate(table [,deletechars]) -> string\n\
1657\n\
1658Return a copy of the string S, where all characters occurring\n\
1659in the optional argument deletechars are removed, and the\n\
1660remaining characters have been mapped through the given\n\
1661translation table, which must be a string of length 256.";
1662
1663static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001664string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001666 register char *input, *output;
1667 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 register int i, c, changed = 0;
1669 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001670 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 int inlen, tablen, dellen = 0;
1672 PyObject *result;
1673 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676 if (!PyArg_ParseTuple(args, "O|O:translate",
1677 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679
1680 if (PyString_Check(tableobj)) {
1681 table1 = PyString_AS_STRING(tableobj);
1682 tablen = PyString_GET_SIZE(tableobj);
1683 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001684#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001685 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001686 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 parameter; instead a mapping to None will cause characters
1688 to be deleted. */
1689 if (delobj != NULL) {
1690 PyErr_SetString(PyExc_TypeError,
1691 "deletions are implemented differently for unicode");
1692 return NULL;
1693 }
1694 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1695 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001696#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001697 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001699
1700 if (delobj != NULL) {
1701 if (PyString_Check(delobj)) {
1702 del_table = PyString_AS_STRING(delobj);
1703 dellen = PyString_GET_SIZE(delobj);
1704 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001705#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001706 else if (PyUnicode_Check(delobj)) {
1707 PyErr_SetString(PyExc_TypeError,
1708 "deletions are implemented differently for unicode");
1709 return NULL;
1710 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001711#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1713 return NULL;
1714
1715 if (tablen != 256) {
1716 PyErr_SetString(PyExc_ValueError,
1717 "translation table must be 256 characters long");
1718 return NULL;
1719 }
1720 }
1721 else {
1722 del_table = NULL;
1723 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 }
1725
1726 table = table1;
1727 inlen = PyString_Size(input_obj);
1728 result = PyString_FromStringAndSize((char *)NULL, inlen);
1729 if (result == NULL)
1730 return NULL;
1731 output_start = output = PyString_AsString(result);
1732 input = PyString_AsString(input_obj);
1733
1734 if (dellen == 0) {
1735 /* If no deletions are required, use faster code */
1736 for (i = inlen; --i >= 0; ) {
1737 c = Py_CHARMASK(*input++);
1738 if (Py_CHARMASK((*output++ = table[c])) != c)
1739 changed = 1;
1740 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001741 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 return result;
1743 Py_DECREF(result);
1744 Py_INCREF(input_obj);
1745 return input_obj;
1746 }
1747
1748 for (i = 0; i < 256; i++)
1749 trans_table[i] = Py_CHARMASK(table[i]);
1750
1751 for (i = 0; i < dellen; i++)
1752 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1753
1754 for (i = inlen; --i >= 0; ) {
1755 c = Py_CHARMASK(*input++);
1756 if (trans_table[c] != -1)
1757 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1758 continue;
1759 changed = 1;
1760 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001761 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 Py_DECREF(result);
1763 Py_INCREF(input_obj);
1764 return input_obj;
1765 }
1766 /* Fix the size of the resulting string */
1767 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1768 return NULL;
1769 return result;
1770}
1771
1772
1773/* What follows is used for implementing replace(). Perry Stoll. */
1774
1775/*
1776 mymemfind
1777
1778 strstr replacement for arbitrary blocks of memory.
1779
Barry Warsaw51ac5802000-03-20 16:36:48 +00001780 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 contents of memory pointed to by PAT. Returns the index into MEM if
1782 found, or -1 if not found. If len of PAT is greater than length of
1783 MEM, the function returns -1.
1784*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001785static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001786mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787{
1788 register int ii;
1789
1790 /* pattern can not occur in the last pat_len-1 chars */
1791 len -= pat_len;
1792
1793 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001794 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 return ii;
1796 }
1797 }
1798 return -1;
1799}
1800
1801/*
1802 mymemcnt
1803
1804 Return the number of distinct times PAT is found in MEM.
1805 meaning mem=1111 and pat==11 returns 2.
1806 mem=11111 and pat==11 also return 2.
1807 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001808static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001809mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810{
1811 register int offset = 0;
1812 int nfound = 0;
1813
1814 while (len >= 0) {
1815 offset = mymemfind(mem, len, pat, pat_len);
1816 if (offset == -1)
1817 break;
1818 mem += offset + pat_len;
1819 len -= offset + pat_len;
1820 nfound++;
1821 }
1822 return nfound;
1823}
1824
1825/*
1826 mymemreplace
1827
Thomas Wouters7e474022000-07-16 12:04:32 +00001828 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 replaced with SUB.
1830
Thomas Wouters7e474022000-07-16 12:04:32 +00001831 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 of PAT in STR, then the original string is returned. Otherwise, a new
1833 string is allocated here and returned.
1834
1835 on return, out_len is:
1836 the length of output string, or
1837 -1 if the input string is returned, or
1838 unchanged if an error occurs (no memory).
1839
1840 return value is:
1841 the new string allocated locally, or
1842 NULL if an error occurred.
1843*/
1844static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001845mymemreplace(const char *str, int len, /* input string */
1846 const char *pat, int pat_len, /* pattern string to find */
1847 const char *sub, int sub_len, /* substitution string */
1848 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001849 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850{
1851 char *out_s;
1852 char *new_s;
1853 int nfound, offset, new_len;
1854
1855 if (len == 0 || pat_len > len)
1856 goto return_same;
1857
1858 /* find length of output string */
1859 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001860 if (count < 0)
1861 count = INT_MAX;
1862 else if (nfound > count)
1863 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 if (nfound == 0)
1865 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001866
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001868 if (new_len == 0) {
1869 /* Have to allocate something for the caller to free(). */
1870 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001871 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001872 return NULL;
1873 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001875 else {
1876 assert(new_len > 0);
1877 new_s = (char *)PyMem_MALLOC(new_len);
1878 if (new_s == NULL)
1879 return NULL;
1880 out_s = new_s;
1881
Tim Peters9c012af2001-05-10 00:32:57 +00001882 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001883 /* find index of next instance of pattern */
1884 offset = mymemfind(str, len, pat, pat_len);
1885 if (offset == -1)
1886 break;
1887
1888 /* copy non matching part of input string */
1889 memcpy(new_s, str, offset);
1890 str += offset + pat_len;
1891 len -= offset + pat_len;
1892
1893 /* copy substitute into the output string */
1894 new_s += offset;
1895 memcpy(new_s, sub, sub_len);
1896 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001897 }
1898 /* copy any remaining values into output string */
1899 if (len > 0)
1900 memcpy(new_s, str, len);
1901 }
1902 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 return out_s;
1904
1905 return_same:
1906 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001907 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
1911static char replace__doc__[] =
1912"S.replace (old, new[, maxsplit]) -> string\n\
1913\n\
1914Return a copy of string S with all occurrences of substring\n\
1915old replaced by new. If the optional argument maxsplit is\n\
1916given, only the first maxsplit occurrences are replaced.";
1917
1918static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001919string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001921 const char *str = PyString_AS_STRING(self), *sub, *repl;
1922 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001923 const int len = PyString_GET_SIZE(self);
1924 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001927 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
Guido van Rossum4c08d552000-03-10 22:55:18 +00001929 if (!PyArg_ParseTuple(args, "OO|i:replace",
1930 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001932
1933 if (PyString_Check(subobj)) {
1934 sub = PyString_AS_STRING(subobj);
1935 sub_len = PyString_GET_SIZE(subobj);
1936 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001937#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001939 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001940 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001941#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001942 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1943 return NULL;
1944
1945 if (PyString_Check(replobj)) {
1946 repl = PyString_AS_STRING(replobj);
1947 repl_len = PyString_GET_SIZE(replobj);
1948 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001949#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001951 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001953#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1955 return NULL;
1956
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001957 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001958 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 return NULL;
1960 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001961 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962 if (new_s == NULL) {
1963 PyErr_NoMemory();
1964 return NULL;
1965 }
1966 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001967 if (PyString_CheckExact(self)) {
1968 /* we're returning another reference to self */
1969 new = (PyObject*)self;
1970 Py_INCREF(new);
1971 }
1972 else {
1973 new = PyString_FromStringAndSize(str, len);
1974 if (new == NULL)
1975 return NULL;
1976 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 }
1978 else {
1979 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001980 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981 }
1982 return new;
1983}
1984
1985
1986static char startswith__doc__[] =
1987"S.startswith(prefix[, start[, end]]) -> int\n\
1988\n\
1989Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1990optional start, test S beginning at that position. With optional end, stop\n\
1991comparing S at that position.";
1992
1993static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001994string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001996 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001998 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999 int plen;
2000 int start = 0;
2001 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002002 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003
Guido van Rossumc6821402000-05-08 14:08:05 +00002004 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2005 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002006 return NULL;
2007 if (PyString_Check(subobj)) {
2008 prefix = PyString_AS_STRING(subobj);
2009 plen = PyString_GET_SIZE(subobj);
2010 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002011#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002012 else if (PyUnicode_Check(subobj)) {
2013 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002014 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002015 subobj, start, end, -1);
2016 if (rc == -1)
2017 return NULL;
2018 else
2019 return PyInt_FromLong((long) rc);
2020 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002021#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 return NULL;
2024
2025 /* adopt Java semantics for index out of range. it is legal for
2026 * offset to be == plen, but this only returns true if prefix is
2027 * the empty string.
2028 */
2029 if (start < 0 || start+plen > len)
2030 return PyInt_FromLong(0);
2031
2032 if (!memcmp(str+start, prefix, plen)) {
2033 /* did the match end after the specified end? */
2034 if (end < 0)
2035 return PyInt_FromLong(1);
2036 else if (end - start < plen)
2037 return PyInt_FromLong(0);
2038 else
2039 return PyInt_FromLong(1);
2040 }
2041 else return PyInt_FromLong(0);
2042}
2043
2044
2045static char endswith__doc__[] =
2046"S.endswith(suffix[, start[, end]]) -> int\n\
2047\n\
2048Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2049optional start, test S beginning at that position. With optional end, stop\n\
2050comparing S at that position.";
2051
2052static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002053string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057 const char* suffix;
2058 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059 int start = 0;
2060 int end = -1;
2061 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002062 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
Guido van Rossumc6821402000-05-08 14:08:05 +00002064 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2065 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002066 return NULL;
2067 if (PyString_Check(subobj)) {
2068 suffix = PyString_AS_STRING(subobj);
2069 slen = PyString_GET_SIZE(subobj);
2070 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002071#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002072 else if (PyUnicode_Check(subobj)) {
2073 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002074 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002075 subobj, start, end, +1);
2076 if (rc == -1)
2077 return NULL;
2078 else
2079 return PyInt_FromLong((long) rc);
2080 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002081#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083 return NULL;
2084
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 return PyInt_FromLong(0);
2087
2088 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092 return PyInt_FromLong(1);
2093 else return PyInt_FromLong(0);
2094}
2095
2096
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002097static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002098"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002099\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002100Encodes S using the codec registered for encoding. encoding defaults\n\
2101to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002102handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2103a ValueError. Other possible values are 'ignore' and 'replace'.";
2104
2105static PyObject *
2106string_encode(PyStringObject *self, PyObject *args)
2107{
2108 char *encoding = NULL;
2109 char *errors = NULL;
2110 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2111 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002112 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2113}
2114
2115
2116static char decode__doc__[] =
2117"S.decode([encoding[,errors]]) -> object\n\
2118\n\
2119Decodes S using the codec registered for encoding. encoding defaults\n\
2120to the default encoding. errors may be given to set a different error\n\
2121handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2122a ValueError. Other possible values are 'ignore' and 'replace'.";
2123
2124static PyObject *
2125string_decode(PyStringObject *self, PyObject *args)
2126{
2127 char *encoding = NULL;
2128 char *errors = NULL;
2129 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2130 return NULL;
2131 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002132}
2133
2134
Guido van Rossum4c08d552000-03-10 22:55:18 +00002135static char expandtabs__doc__[] =
2136"S.expandtabs([tabsize]) -> string\n\
2137\n\
2138Return a copy of S where all tab characters are expanded using spaces.\n\
2139If tabsize is not given, a tab size of 8 characters is assumed.";
2140
2141static PyObject*
2142string_expandtabs(PyStringObject *self, PyObject *args)
2143{
2144 const char *e, *p;
2145 char *q;
2146 int i, j;
2147 PyObject *u;
2148 int tabsize = 8;
2149
2150 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2151 return NULL;
2152
Thomas Wouters7e474022000-07-16 12:04:32 +00002153 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154 i = j = 0;
2155 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2156 for (p = PyString_AS_STRING(self); p < e; p++)
2157 if (*p == '\t') {
2158 if (tabsize > 0)
2159 j += tabsize - (j % tabsize);
2160 }
2161 else {
2162 j++;
2163 if (*p == '\n' || *p == '\r') {
2164 i += j;
2165 j = 0;
2166 }
2167 }
2168
2169 /* Second pass: create output string and fill it */
2170 u = PyString_FromStringAndSize(NULL, i + j);
2171 if (!u)
2172 return NULL;
2173
2174 j = 0;
2175 q = PyString_AS_STRING(u);
2176
2177 for (p = PyString_AS_STRING(self); p < e; p++)
2178 if (*p == '\t') {
2179 if (tabsize > 0) {
2180 i = tabsize - (j % tabsize);
2181 j += i;
2182 while (i--)
2183 *q++ = ' ';
2184 }
2185 }
2186 else {
2187 j++;
2188 *q++ = *p;
2189 if (*p == '\n' || *p == '\r')
2190 j = 0;
2191 }
2192
2193 return u;
2194}
2195
Tim Peters8fa5dd02001-09-12 02:18:30 +00002196static PyObject *
2197pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198{
2199 PyObject *u;
2200
2201 if (left < 0)
2202 left = 0;
2203 if (right < 0)
2204 right = 0;
2205
Tim Peters8fa5dd02001-09-12 02:18:30 +00002206 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 Py_INCREF(self);
2208 return (PyObject *)self;
2209 }
2210
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002211 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002212 left + PyString_GET_SIZE(self) + right);
2213 if (u) {
2214 if (left)
2215 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002216 memcpy(PyString_AS_STRING(u) + left,
2217 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218 PyString_GET_SIZE(self));
2219 if (right)
2220 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2221 fill, right);
2222 }
2223
2224 return u;
2225}
2226
2227static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002228"S.ljust(width) -> string\n"
2229"\n"
2230"Return S left justified in a string of length width. Padding is\n"
2231"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232
2233static PyObject *
2234string_ljust(PyStringObject *self, PyObject *args)
2235{
2236 int width;
2237 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2238 return NULL;
2239
Tim Peters8fa5dd02001-09-12 02:18:30 +00002240 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 Py_INCREF(self);
2242 return (PyObject*) self;
2243 }
2244
2245 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2246}
2247
2248
2249static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002250"S.rjust(width) -> string\n"
2251"\n"
2252"Return S right justified in a string of length width. Padding is\n"
2253"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
2255static PyObject *
2256string_rjust(PyStringObject *self, PyObject *args)
2257{
2258 int width;
2259 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2260 return NULL;
2261
Tim Peters8fa5dd02001-09-12 02:18:30 +00002262 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263 Py_INCREF(self);
2264 return (PyObject*) self;
2265 }
2266
2267 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2268}
2269
2270
2271static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002272"S.center(width) -> string\n"
2273"\n"
2274"Return S centered in a string of length width. Padding is done\n"
2275"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276
2277static PyObject *
2278string_center(PyStringObject *self, PyObject *args)
2279{
2280 int marg, left;
2281 int width;
2282
2283 if (!PyArg_ParseTuple(args, "i:center", &width))
2284 return NULL;
2285
Tim Peters8fa5dd02001-09-12 02:18:30 +00002286 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 Py_INCREF(self);
2288 return (PyObject*) self;
2289 }
2290
2291 marg = width - PyString_GET_SIZE(self);
2292 left = marg / 2 + (marg & width & 1);
2293
2294 return pad(self, left, marg - left, ' ');
2295}
2296
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002298"S.isspace() -> int\n"
2299"\n"
2300"Return 1 if there are only whitespace characters in S,\n"
2301"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302
2303static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002304string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305{
Fred Drakeba096332000-07-09 07:04:36 +00002306 register const unsigned char *p
2307 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002308 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 /* Shortcut for single character strings */
2311 if (PyString_GET_SIZE(self) == 1 &&
2312 isspace(*p))
2313 return PyInt_FromLong(1);
2314
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002315 /* Special case for empty strings */
2316 if (PyString_GET_SIZE(self) == 0)
2317 return PyInt_FromLong(0);
2318
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319 e = p + PyString_GET_SIZE(self);
2320 for (; p < e; p++) {
2321 if (!isspace(*p))
2322 return PyInt_FromLong(0);
2323 }
2324 return PyInt_FromLong(1);
2325}
2326
2327
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002328static char isalpha__doc__[] =
2329"S.isalpha() -> int\n\
2330\n\
2331Return 1 if all characters in S are alphabetic\n\
2332and there is at least one character in S, 0 otherwise.";
2333
2334static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002335string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002336{
Fred Drakeba096332000-07-09 07:04:36 +00002337 register const unsigned char *p
2338 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002339 register const unsigned char *e;
2340
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002341 /* Shortcut for single character strings */
2342 if (PyString_GET_SIZE(self) == 1 &&
2343 isalpha(*p))
2344 return PyInt_FromLong(1);
2345
2346 /* Special case for empty strings */
2347 if (PyString_GET_SIZE(self) == 0)
2348 return PyInt_FromLong(0);
2349
2350 e = p + PyString_GET_SIZE(self);
2351 for (; p < e; p++) {
2352 if (!isalpha(*p))
2353 return PyInt_FromLong(0);
2354 }
2355 return PyInt_FromLong(1);
2356}
2357
2358
2359static char isalnum__doc__[] =
2360"S.isalnum() -> int\n\
2361\n\
2362Return 1 if all characters in S are alphanumeric\n\
2363and there is at least one character in S, 0 otherwise.";
2364
2365static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002366string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002367{
Fred Drakeba096332000-07-09 07:04:36 +00002368 register const unsigned char *p
2369 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002370 register const unsigned char *e;
2371
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002372 /* Shortcut for single character strings */
2373 if (PyString_GET_SIZE(self) == 1 &&
2374 isalnum(*p))
2375 return PyInt_FromLong(1);
2376
2377 /* Special case for empty strings */
2378 if (PyString_GET_SIZE(self) == 0)
2379 return PyInt_FromLong(0);
2380
2381 e = p + PyString_GET_SIZE(self);
2382 for (; p < e; p++) {
2383 if (!isalnum(*p))
2384 return PyInt_FromLong(0);
2385 }
2386 return PyInt_FromLong(1);
2387}
2388
2389
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390static char isdigit__doc__[] =
2391"S.isdigit() -> int\n\
2392\n\
2393Return 1 if there are only digit characters in S,\n\
23940 otherwise.";
2395
2396static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002397string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398{
Fred Drakeba096332000-07-09 07:04:36 +00002399 register const unsigned char *p
2400 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002401 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402
Guido van Rossum4c08d552000-03-10 22:55:18 +00002403 /* Shortcut for single character strings */
2404 if (PyString_GET_SIZE(self) == 1 &&
2405 isdigit(*p))
2406 return PyInt_FromLong(1);
2407
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002408 /* Special case for empty strings */
2409 if (PyString_GET_SIZE(self) == 0)
2410 return PyInt_FromLong(0);
2411
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 e = p + PyString_GET_SIZE(self);
2413 for (; p < e; p++) {
2414 if (!isdigit(*p))
2415 return PyInt_FromLong(0);
2416 }
2417 return PyInt_FromLong(1);
2418}
2419
2420
2421static char islower__doc__[] =
2422"S.islower() -> int\n\
2423\n\
2424Return 1 if all cased characters in S are lowercase and there is\n\
2425at least one cased character in S, 0 otherwise.";
2426
2427static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002428string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429{
Fred Drakeba096332000-07-09 07:04:36 +00002430 register const unsigned char *p
2431 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002432 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 int cased;
2434
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 /* Shortcut for single character strings */
2436 if (PyString_GET_SIZE(self) == 1)
2437 return PyInt_FromLong(islower(*p) != 0);
2438
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002439 /* Special case for empty strings */
2440 if (PyString_GET_SIZE(self) == 0)
2441 return PyInt_FromLong(0);
2442
Guido van Rossum4c08d552000-03-10 22:55:18 +00002443 e = p + PyString_GET_SIZE(self);
2444 cased = 0;
2445 for (; p < e; p++) {
2446 if (isupper(*p))
2447 return PyInt_FromLong(0);
2448 else if (!cased && islower(*p))
2449 cased = 1;
2450 }
2451 return PyInt_FromLong(cased);
2452}
2453
2454
2455static char isupper__doc__[] =
2456"S.isupper() -> int\n\
2457\n\
2458Return 1 if all cased characters in S are uppercase and there is\n\
2459at least one cased character in S, 0 otherwise.";
2460
2461static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002462string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002463{
Fred Drakeba096332000-07-09 07:04:36 +00002464 register const unsigned char *p
2465 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002466 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 int cased;
2468
Guido van Rossum4c08d552000-03-10 22:55:18 +00002469 /* Shortcut for single character strings */
2470 if (PyString_GET_SIZE(self) == 1)
2471 return PyInt_FromLong(isupper(*p) != 0);
2472
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002473 /* Special case for empty strings */
2474 if (PyString_GET_SIZE(self) == 0)
2475 return PyInt_FromLong(0);
2476
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477 e = p + PyString_GET_SIZE(self);
2478 cased = 0;
2479 for (; p < e; p++) {
2480 if (islower(*p))
2481 return PyInt_FromLong(0);
2482 else if (!cased && isupper(*p))
2483 cased = 1;
2484 }
2485 return PyInt_FromLong(cased);
2486}
2487
2488
2489static char istitle__doc__[] =
2490"S.istitle() -> int\n\
2491\n\
2492Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2493may only follow uncased characters and lowercase characters only cased\n\
2494ones. Return 0 otherwise.";
2495
2496static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002497string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002498{
Fred Drakeba096332000-07-09 07:04:36 +00002499 register const unsigned char *p
2500 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002501 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002502 int cased, previous_is_cased;
2503
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 /* Shortcut for single character strings */
2505 if (PyString_GET_SIZE(self) == 1)
2506 return PyInt_FromLong(isupper(*p) != 0);
2507
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002508 /* Special case for empty strings */
2509 if (PyString_GET_SIZE(self) == 0)
2510 return PyInt_FromLong(0);
2511
Guido van Rossum4c08d552000-03-10 22:55:18 +00002512 e = p + PyString_GET_SIZE(self);
2513 cased = 0;
2514 previous_is_cased = 0;
2515 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002516 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517
2518 if (isupper(ch)) {
2519 if (previous_is_cased)
2520 return PyInt_FromLong(0);
2521 previous_is_cased = 1;
2522 cased = 1;
2523 }
2524 else if (islower(ch)) {
2525 if (!previous_is_cased)
2526 return PyInt_FromLong(0);
2527 previous_is_cased = 1;
2528 cased = 1;
2529 }
2530 else
2531 previous_is_cased = 0;
2532 }
2533 return PyInt_FromLong(cased);
2534}
2535
2536
2537static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002538"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539\n\
2540Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002541Line breaks are not included in the resulting list unless keepends\n\
2542is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002543
2544#define SPLIT_APPEND(data, left, right) \
2545 str = PyString_FromStringAndSize(data + left, right - left); \
2546 if (!str) \
2547 goto onError; \
2548 if (PyList_Append(list, str)) { \
2549 Py_DECREF(str); \
2550 goto onError; \
2551 } \
2552 else \
2553 Py_DECREF(str);
2554
2555static PyObject*
2556string_splitlines(PyStringObject *self, PyObject *args)
2557{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 register int i;
2559 register int j;
2560 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002561 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002562 PyObject *list;
2563 PyObject *str;
2564 char *data;
2565
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002566 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 return NULL;
2568
2569 data = PyString_AS_STRING(self);
2570 len = PyString_GET_SIZE(self);
2571
Guido van Rossum4c08d552000-03-10 22:55:18 +00002572 list = PyList_New(0);
2573 if (!list)
2574 goto onError;
2575
2576 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002577 int eol;
2578
Guido van Rossum4c08d552000-03-10 22:55:18 +00002579 /* Find a line and append it */
2580 while (i < len && data[i] != '\n' && data[i] != '\r')
2581 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582
2583 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002584 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585 if (i < len) {
2586 if (data[i] == '\r' && i + 1 < len &&
2587 data[i+1] == '\n')
2588 i += 2;
2589 else
2590 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002591 if (keepends)
2592 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002594 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 j = i;
2596 }
2597 if (j < len) {
2598 SPLIT_APPEND(data, j, len);
2599 }
2600
2601 return list;
2602
2603 onError:
2604 Py_DECREF(list);
2605 return NULL;
2606}
2607
2608#undef SPLIT_APPEND
2609
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002611static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002612string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002613 /* Counterparts of the obsolete stropmodule functions; except
2614 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002615 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2616 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2617 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2618 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2619 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2620 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2621 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2622 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2623 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2624 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2625 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2626 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2627 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2628 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2629 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2630 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2631 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2632 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2633 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2634 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2635 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2636 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2637 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2638 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2639 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2640 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2641 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2642 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2643 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2644 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2645 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2646 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2647 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002649 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651 {NULL, NULL} /* sentinel */
2652};
2653
Guido van Rossumae960af2001-08-30 03:11:59 +00002654staticforward PyObject *
2655str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2656
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002657static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002658string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002660 PyObject *x = NULL;
2661 static char *kwlist[] = {"object", 0};
2662
Guido van Rossumae960af2001-08-30 03:11:59 +00002663 if (type != &PyString_Type)
2664 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002665 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2666 return NULL;
2667 if (x == NULL)
2668 return PyString_FromString("");
2669 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002670}
2671
Guido van Rossumae960af2001-08-30 03:11:59 +00002672static PyObject *
2673str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2674{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002675 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002676 int n;
2677
2678 assert(PyType_IsSubtype(type, &PyString_Type));
2679 tmp = string_new(&PyString_Type, args, kwds);
2680 if (tmp == NULL)
2681 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002682 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002683 n = PyString_GET_SIZE(tmp);
2684 pnew = type->tp_alloc(type, n);
2685 if (pnew != NULL) {
2686 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2687#ifdef CACHE_HASH
2688 ((PyStringObject *)pnew)->ob_shash =
2689 ((PyStringObject *)tmp)->ob_shash;
2690#endif
2691#ifdef INTERN_STRINGS
2692 ((PyStringObject *)pnew)->ob_sinterned =
2693 ((PyStringObject *)tmp)->ob_sinterned;
2694#endif
2695 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002696 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002697 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002698}
2699
Tim Peters6d6c1a32001-08-02 04:15:00 +00002700static char string_doc[] =
2701"str(object) -> string\n\
2702\n\
2703Return a nice string representation of the object.\n\
2704If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002705
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002706PyTypeObject PyString_Type = {
2707 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002708 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002709 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002710 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002711 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002712 (destructor)string_dealloc, /* tp_dealloc */
2713 (printfunc)string_print, /* tp_print */
2714 0, /* tp_getattr */
2715 0, /* tp_setattr */
2716 0, /* tp_compare */
2717 (reprfunc)string_repr, /* tp_repr */
2718 0, /* tp_as_number */
2719 &string_as_sequence, /* tp_as_sequence */
2720 0, /* tp_as_mapping */
2721 (hashfunc)string_hash, /* tp_hash */
2722 0, /* tp_call */
2723 (reprfunc)string_str, /* tp_str */
2724 PyObject_GenericGetAttr, /* tp_getattro */
2725 0, /* tp_setattro */
2726 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002727 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002728 string_doc, /* tp_doc */
2729 0, /* tp_traverse */
2730 0, /* tp_clear */
2731 (richcmpfunc)string_richcompare, /* tp_richcompare */
2732 0, /* tp_weaklistoffset */
2733 0, /* tp_iter */
2734 0, /* tp_iternext */
2735 string_methods, /* tp_methods */
2736 0, /* tp_members */
2737 0, /* tp_getset */
2738 0, /* tp_base */
2739 0, /* tp_dict */
2740 0, /* tp_descr_get */
2741 0, /* tp_descr_set */
2742 0, /* tp_dictoffset */
2743 0, /* tp_init */
2744 0, /* tp_alloc */
2745 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002746};
2747
2748void
Fred Drakeba096332000-07-09 07:04:36 +00002749PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002752 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002753 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002754 if (w == NULL || !PyString_Check(*pv)) {
2755 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002756 *pv = NULL;
2757 return;
2758 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 v = string_concat((PyStringObject *) *pv, w);
2760 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002761 *pv = v;
2762}
2763
Guido van Rossum013142a1994-08-30 08:19:36 +00002764void
Fred Drakeba096332000-07-09 07:04:36 +00002765PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002766{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002767 PyString_Concat(pv, w);
2768 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002769}
2770
2771
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002772/* The following function breaks the notion that strings are immutable:
2773 it changes the size of a string. We get away with this only if there
2774 is only one module referencing the object. You can also think of it
2775 as creating a new string object and destroying the old one, only
2776 more efficiently. In any case, don't use this if the string may
2777 already be known to some other part of the code... */
2778
2779int
Fred Drakeba096332000-07-09 07:04:36 +00002780_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002781{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 register PyObject *v;
2783 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002784 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002786 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 Py_DECREF(v);
2788 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002789 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002790 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002791 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002792#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002793 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002794#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 _Py_ForgetReference(v);
2796 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002797 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002799 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002800 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002802 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002804 _Py_NewReference(*pv);
2805 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002806 sv->ob_size = newsize;
2807 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002808 return 0;
2809}
Guido van Rossume5372401993-03-16 12:15:04 +00002810
2811/* Helpers for formatstring */
2812
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002813static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002814getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002815{
2816 int argidx = *p_argidx;
2817 if (argidx < arglen) {
2818 (*p_argidx)++;
2819 if (arglen < 0)
2820 return args;
2821 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002823 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 PyErr_SetString(PyExc_TypeError,
2825 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002826 return NULL;
2827}
2828
Tim Peters38fd5b62000-09-21 05:43:11 +00002829/* Format codes
2830 * F_LJUST '-'
2831 * F_SIGN '+'
2832 * F_BLANK ' '
2833 * F_ALT '#'
2834 * F_ZERO '0'
2835 */
Guido van Rossume5372401993-03-16 12:15:04 +00002836#define F_LJUST (1<<0)
2837#define F_SIGN (1<<1)
2838#define F_BLANK (1<<2)
2839#define F_ALT (1<<3)
2840#define F_ZERO (1<<4)
2841
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002842static int
Fred Drakeba096332000-07-09 07:04:36 +00002843formatfloat(char *buf, size_t buflen, int flags,
2844 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002845{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002846 /* fmt = '%#.' + `prec` + `type`
2847 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002848 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002849 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002850 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002851 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002852 if (prec < 0)
2853 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002854 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2855 type = 'g';
2856 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002857 /* worst case length calc to ensure no buffer overrun:
2858 fmt = %#.<prec>g
2859 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002860 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002861 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2862 If prec=0 the effective precision is 1 (the leading digit is
2863 always given), therefore increase by one to 10+prec. */
2864 if (buflen <= (size_t)10 + (size_t)prec) {
2865 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002866 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 return -1;
2868 }
Guido van Rossume5372401993-03-16 12:15:04 +00002869 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002870 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002871}
2872
Tim Peters38fd5b62000-09-21 05:43:11 +00002873/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2874 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2875 * Python's regular ints.
2876 * Return value: a new PyString*, or NULL if error.
2877 * . *pbuf is set to point into it,
2878 * *plen set to the # of chars following that.
2879 * Caller must decref it when done using pbuf.
2880 * The string starting at *pbuf is of the form
2881 * "-"? ("0x" | "0X")? digit+
2882 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002883 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002884 * There will be at least prec digits, zero-filled on the left if
2885 * necessary to get that many.
2886 * val object to be converted
2887 * flags bitmask of format flags; only F_ALT is looked at
2888 * prec minimum number of digits; 0-fill on left if needed
2889 * type a character in [duoxX]; u acts the same as d
2890 *
2891 * CAUTION: o, x and X conversions on regular ints can never
2892 * produce a '-' sign, but can for Python's unbounded ints.
2893 */
2894PyObject*
2895_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2896 char **pbuf, int *plen)
2897{
2898 PyObject *result = NULL;
2899 char *buf;
2900 int i;
2901 int sign; /* 1 if '-', else 0 */
2902 int len; /* number of characters */
2903 int numdigits; /* len == numnondigits + numdigits */
2904 int numnondigits = 0;
2905
2906 switch (type) {
2907 case 'd':
2908 case 'u':
2909 result = val->ob_type->tp_str(val);
2910 break;
2911 case 'o':
2912 result = val->ob_type->tp_as_number->nb_oct(val);
2913 break;
2914 case 'x':
2915 case 'X':
2916 numnondigits = 2;
2917 result = val->ob_type->tp_as_number->nb_hex(val);
2918 break;
2919 default:
2920 assert(!"'type' not in [duoxX]");
2921 }
2922 if (!result)
2923 return NULL;
2924
2925 /* To modify the string in-place, there can only be one reference. */
2926 if (result->ob_refcnt != 1) {
2927 PyErr_BadInternalCall();
2928 return NULL;
2929 }
2930 buf = PyString_AsString(result);
2931 len = PyString_Size(result);
2932 if (buf[len-1] == 'L') {
2933 --len;
2934 buf[len] = '\0';
2935 }
2936 sign = buf[0] == '-';
2937 numnondigits += sign;
2938 numdigits = len - numnondigits;
2939 assert(numdigits > 0);
2940
Tim Petersfff53252001-04-12 18:38:48 +00002941 /* Get rid of base marker unless F_ALT */
2942 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002943 /* Need to skip 0x, 0X or 0. */
2944 int skipped = 0;
2945 switch (type) {
2946 case 'o':
2947 assert(buf[sign] == '0');
2948 /* If 0 is only digit, leave it alone. */
2949 if (numdigits > 1) {
2950 skipped = 1;
2951 --numdigits;
2952 }
2953 break;
2954 case 'x':
2955 case 'X':
2956 assert(buf[sign] == '0');
2957 assert(buf[sign + 1] == 'x');
2958 skipped = 2;
2959 numnondigits -= 2;
2960 break;
2961 }
2962 if (skipped) {
2963 buf += skipped;
2964 len -= skipped;
2965 if (sign)
2966 buf[0] = '-';
2967 }
2968 assert(len == numnondigits + numdigits);
2969 assert(numdigits > 0);
2970 }
2971
2972 /* Fill with leading zeroes to meet minimum width. */
2973 if (prec > numdigits) {
2974 PyObject *r1 = PyString_FromStringAndSize(NULL,
2975 numnondigits + prec);
2976 char *b1;
2977 if (!r1) {
2978 Py_DECREF(result);
2979 return NULL;
2980 }
2981 b1 = PyString_AS_STRING(r1);
2982 for (i = 0; i < numnondigits; ++i)
2983 *b1++ = *buf++;
2984 for (i = 0; i < prec - numdigits; i++)
2985 *b1++ = '0';
2986 for (i = 0; i < numdigits; i++)
2987 *b1++ = *buf++;
2988 *b1 = '\0';
2989 Py_DECREF(result);
2990 result = r1;
2991 buf = PyString_AS_STRING(result);
2992 len = numnondigits + prec;
2993 }
2994
2995 /* Fix up case for hex conversions. */
2996 switch (type) {
2997 case 'x':
2998 /* Need to convert all upper case letters to lower case. */
2999 for (i = 0; i < len; i++)
3000 if (buf[i] >= 'A' && buf[i] <= 'F')
3001 buf[i] += 'a'-'A';
3002 break;
3003 case 'X':
3004 /* Need to convert 0x to 0X (and -0x to -0X). */
3005 if (buf[sign + 1] == 'x')
3006 buf[sign + 1] = 'X';
3007 break;
3008 }
3009 *pbuf = buf;
3010 *plen = len;
3011 return result;
3012}
3013
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003014static int
Fred Drakeba096332000-07-09 07:04:36 +00003015formatint(char *buf, size_t buflen, int flags,
3016 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003017{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003018 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003019 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3020 + 1 + 1 = 24 */
3021 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003022 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003023 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003024 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003025 if (prec < 0)
3026 prec = 1;
3027 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003028 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003029 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003030 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003031 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003032 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003033 return -1;
3034 }
Guido van Rossume5372401993-03-16 12:15:04 +00003035 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003036 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3037 * but we want it (for consistency with other %#x conversions, and
3038 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003039 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3040 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3041 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003042 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003043 if (x == 0 &&
3044 (flags & F_ALT) &&
3045 (type == 'x' || type == 'X') &&
3046 buf[1] != (char)type) /* this last always true under std C */
3047 {
Tim Petersfff53252001-04-12 18:38:48 +00003048 memmove(buf+2, buf, strlen(buf) + 1);
3049 buf[0] = '0';
3050 buf[1] = (char)type;
3051 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003052 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003053}
3054
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003055static int
Fred Drakeba096332000-07-09 07:04:36 +00003056formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003057{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003058 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003059 if (PyString_Check(v)) {
3060 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003061 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003062 }
3063 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003064 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003065 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003066 }
3067 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003068 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003069}
3070
Guido van Rossum013142a1994-08-30 08:19:36 +00003071
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003072/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3073
3074 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3075 chars are formatted. XXX This is a magic number. Each formatting
3076 routine does bounds checking to ensure no overflow, but a better
3077 solution may be to malloc a buffer of appropriate size for each
3078 format. For now, the current solution is sufficient.
3079*/
3080#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003081
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003083PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003084{
3085 char *fmt, *res;
3086 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003087 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003088 PyObject *result, *orig_args;
3089#ifdef Py_USING_UNICODE
3090 PyObject *v, *w;
3091#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003092 PyObject *dict = NULL;
3093 if (format == NULL || !PyString_Check(format) || args == NULL) {
3094 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003095 return NULL;
3096 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003097 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003098 fmt = PyString_AsString(format);
3099 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003100 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003101 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003102 if (result == NULL)
3103 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 res = PyString_AsString(result);
3105 if (PyTuple_Check(args)) {
3106 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003107 argidx = 0;
3108 }
3109 else {
3110 arglen = -1;
3111 argidx = -2;
3112 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003113 if (args->ob_type->tp_as_mapping)
3114 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003115 while (--fmtcnt >= 0) {
3116 if (*fmt != '%') {
3117 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003118 rescnt = fmtcnt + 100;
3119 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003121 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122 res = PyString_AsString(result)
3123 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003124 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003125 }
3126 *res++ = *fmt++;
3127 }
3128 else {
3129 /* Got a format specifier */
3130 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003131 int width = -1;
3132 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003133 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003134 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003135 PyObject *v = NULL;
3136 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003137 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003138 int sign;
3139 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003140 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003141#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003142 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003143 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003145
Guido van Rossumda9c2711996-12-05 21:58:58 +00003146 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003147 if (*fmt == '(') {
3148 char *keystart;
3149 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003151 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003152
3153 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003154 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003155 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003156 goto error;
3157 }
3158 ++fmt;
3159 --fmtcnt;
3160 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003161 /* Skip over balanced parentheses */
3162 while (pcount > 0 && --fmtcnt >= 0) {
3163 if (*fmt == ')')
3164 --pcount;
3165 else if (*fmt == '(')
3166 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003167 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003168 }
3169 keylen = fmt - keystart - 1;
3170 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003171 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003172 "incomplete format key");
3173 goto error;
3174 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003175 key = PyString_FromStringAndSize(keystart,
3176 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003177 if (key == NULL)
3178 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003179 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003180 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003181 args_owned = 0;
3182 }
3183 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003185 if (args == NULL) {
3186 goto error;
3187 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003188 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003189 arglen = -1;
3190 argidx = -2;
3191 }
Guido van Rossume5372401993-03-16 12:15:04 +00003192 while (--fmtcnt >= 0) {
3193 switch (c = *fmt++) {
3194 case '-': flags |= F_LJUST; continue;
3195 case '+': flags |= F_SIGN; continue;
3196 case ' ': flags |= F_BLANK; continue;
3197 case '#': flags |= F_ALT; continue;
3198 case '0': flags |= F_ZERO; continue;
3199 }
3200 break;
3201 }
3202 if (c == '*') {
3203 v = getnextarg(args, arglen, &argidx);
3204 if (v == NULL)
3205 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003206 if (!PyInt_Check(v)) {
3207 PyErr_SetString(PyExc_TypeError,
3208 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003209 goto error;
3210 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003211 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003212 if (width < 0) {
3213 flags |= F_LJUST;
3214 width = -width;
3215 }
Guido van Rossume5372401993-03-16 12:15:04 +00003216 if (--fmtcnt >= 0)
3217 c = *fmt++;
3218 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003219 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003220 width = c - '0';
3221 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003222 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003223 if (!isdigit(c))
3224 break;
3225 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 PyErr_SetString(
3227 PyExc_ValueError,
3228 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003229 goto error;
3230 }
3231 width = width*10 + (c - '0');
3232 }
3233 }
3234 if (c == '.') {
3235 prec = 0;
3236 if (--fmtcnt >= 0)
3237 c = *fmt++;
3238 if (c == '*') {
3239 v = getnextarg(args, arglen, &argidx);
3240 if (v == NULL)
3241 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003242 if (!PyInt_Check(v)) {
3243 PyErr_SetString(
3244 PyExc_TypeError,
3245 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003246 goto error;
3247 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003249 if (prec < 0)
3250 prec = 0;
3251 if (--fmtcnt >= 0)
3252 c = *fmt++;
3253 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003254 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003255 prec = c - '0';
3256 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003257 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003258 if (!isdigit(c))
3259 break;
3260 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 PyErr_SetString(
3262 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003263 "prec too big");
3264 goto error;
3265 }
3266 prec = prec*10 + (c - '0');
3267 }
3268 }
3269 } /* prec */
3270 if (fmtcnt >= 0) {
3271 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003272 if (--fmtcnt >= 0)
3273 c = *fmt++;
3274 }
3275 }
3276 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 PyErr_SetString(PyExc_ValueError,
3278 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003279 goto error;
3280 }
3281 if (c != '%') {
3282 v = getnextarg(args, arglen, &argidx);
3283 if (v == NULL)
3284 goto error;
3285 }
3286 sign = 0;
3287 fill = ' ';
3288 switch (c) {
3289 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003290 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003291 len = 1;
3292 break;
3293 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003294 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003295#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003296 if (PyUnicode_Check(v)) {
3297 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003298 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003299 goto unicode;
3300 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003301#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003302 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003304 else
3305 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003306 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003307 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003308 if (!PyString_Check(temp)) {
3309 PyErr_SetString(PyExc_TypeError,
3310 "%s argument has non-string str()");
3311 goto error;
3312 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003313 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003315 if (prec >= 0 && len > prec)
3316 len = prec;
3317 break;
3318 case 'i':
3319 case 'd':
3320 case 'u':
3321 case 'o':
3322 case 'x':
3323 case 'X':
3324 if (c == 'i')
3325 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003326 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003327 temp = _PyString_FormatLong(v, flags,
3328 prec, c, &pbuf, &len);
3329 if (!temp)
3330 goto error;
3331 /* unbounded ints can always produce
3332 a sign character! */
3333 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003334 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003335 else {
3336 pbuf = formatbuf;
3337 len = formatint(pbuf, sizeof(formatbuf),
3338 flags, prec, c, v);
3339 if (len < 0)
3340 goto error;
3341 /* only d conversion is signed */
3342 sign = c == 'd';
3343 }
3344 if (flags & F_ZERO)
3345 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003346 break;
3347 case 'e':
3348 case 'E':
3349 case 'f':
3350 case 'g':
3351 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003352 pbuf = formatbuf;
3353 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003354 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003355 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003356 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003357 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003358 fill = '0';
3359 break;
3360 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003361 pbuf = formatbuf;
3362 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003363 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003364 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003365 break;
3366 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003367 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003368 "unsupported format character '%c' (0x%x) "
3369 "at index %i",
3370 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003371 goto error;
3372 }
3373 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003374 if (*pbuf == '-' || *pbuf == '+') {
3375 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003376 len--;
3377 }
3378 else if (flags & F_SIGN)
3379 sign = '+';
3380 else if (flags & F_BLANK)
3381 sign = ' ';
3382 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003383 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003384 }
3385 if (width < len)
3386 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003387 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003388 reslen -= rescnt;
3389 rescnt = width + fmtcnt + 100;
3390 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003391 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003392 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003393 res = PyString_AsString(result)
3394 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003395 }
3396 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003397 if (fill != ' ')
3398 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003399 rescnt--;
3400 if (width > len)
3401 width--;
3402 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003403 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3404 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003405 assert(pbuf[1] == c);
3406 if (fill != ' ') {
3407 *res++ = *pbuf++;
3408 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003409 }
Tim Petersfff53252001-04-12 18:38:48 +00003410 rescnt -= 2;
3411 width -= 2;
3412 if (width < 0)
3413 width = 0;
3414 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003415 }
3416 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003417 do {
3418 --rescnt;
3419 *res++ = fill;
3420 } while (--width > len);
3421 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003422 if (fill == ' ') {
3423 if (sign)
3424 *res++ = sign;
3425 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003426 (c == 'x' || c == 'X')) {
3427 assert(pbuf[0] == '0');
3428 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003429 *res++ = *pbuf++;
3430 *res++ = *pbuf++;
3431 }
3432 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003433 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003434 res += len;
3435 rescnt -= len;
3436 while (--width >= len) {
3437 --rescnt;
3438 *res++ = ' ';
3439 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003440 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003441 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003442 "not all arguments converted");
3443 goto error;
3444 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003446 } /* '%' */
3447 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003448 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 PyErr_SetString(PyExc_TypeError,
3450 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003451 goto error;
3452 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003453 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003455 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003456 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003457 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003458
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003459#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003460 unicode:
3461 if (args_owned) {
3462 Py_DECREF(args);
3463 args_owned = 0;
3464 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003465 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003466 if (PyTuple_Check(orig_args) && argidx > 0) {
3467 PyObject *v;
3468 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3469 v = PyTuple_New(n);
3470 if (v == NULL)
3471 goto error;
3472 while (--n >= 0) {
3473 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3474 Py_INCREF(w);
3475 PyTuple_SET_ITEM(v, n, w);
3476 }
3477 args = v;
3478 } else {
3479 Py_INCREF(orig_args);
3480 args = orig_args;
3481 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003482 args_owned = 1;
3483 /* Take what we have of the result and let the Unicode formatting
3484 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003485 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003486 if (_PyString_Resize(&result, rescnt))
3487 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003488 fmtcnt = PyString_GET_SIZE(format) - \
3489 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003490 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3491 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003492 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003493 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003494 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003495 if (v == NULL)
3496 goto error;
3497 /* Paste what we have (result) to what the Unicode formatting
3498 function returned (v) and return the result (or error) */
3499 w = PyUnicode_Concat(result, v);
3500 Py_DECREF(result);
3501 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003502 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003503 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003504#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003505
Guido van Rossume5372401993-03-16 12:15:04 +00003506 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003507 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003508 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003510 }
Guido van Rossume5372401993-03-16 12:15:04 +00003511 return NULL;
3512}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003513
3514
3515#ifdef INTERN_STRINGS
3516
Barry Warsaw4df762f2000-08-16 23:41:01 +00003517/* This dictionary will leak at PyString_Fini() time. That's acceptable
3518 * because PyString_Fini() specifically frees interned strings that are
3519 * only referenced by this dictionary. The CVS log entry for revision 2.45
3520 * says:
3521 *
3522 * Change the Fini function to only remove otherwise unreferenced
3523 * strings from the interned table. There are references in
3524 * hard-to-find static variables all over the interpreter, and it's not
3525 * worth trying to get rid of all those; but "uninterning" isn't fair
3526 * either and may cause subtle failures later -- so we have to keep them
3527 * in the interned table.
3528 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003529static PyObject *interned;
3530
3531void
Fred Drakeba096332000-07-09 07:04:36 +00003532PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003533{
3534 register PyStringObject *s = (PyStringObject *)(*p);
3535 PyObject *t;
3536 if (s == NULL || !PyString_Check(s))
3537 Py_FatalError("PyString_InternInPlace: strings only please!");
3538 if ((t = s->ob_sinterned) != NULL) {
3539 if (t == (PyObject *)s)
3540 return;
3541 Py_INCREF(t);
3542 *p = t;
3543 Py_DECREF(s);
3544 return;
3545 }
3546 if (interned == NULL) {
3547 interned = PyDict_New();
3548 if (interned == NULL)
3549 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003550 }
3551 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3552 Py_INCREF(t);
3553 *p = s->ob_sinterned = t;
3554 Py_DECREF(s);
3555 return;
3556 }
Tim Peters111f6092001-09-12 07:54:51 +00003557 /* Ensure that only true string objects appear in the intern dict,
3558 and as the value of ob_sinterned. */
3559 if (PyString_CheckExact(s)) {
3560 t = (PyObject *)s;
3561 if (PyDict_SetItem(interned, t, t) == 0) {
3562 s->ob_sinterned = t;
3563 return;
3564 }
3565 }
3566 else {
3567 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3568 PyString_GET_SIZE(s));
3569 if (t != NULL) {
3570 if (PyDict_SetItem(interned, t, t) == 0) {
3571 *p = s->ob_sinterned = t;
3572 Py_DECREF(s);
3573 return;
3574 }
3575 Py_DECREF(t);
3576 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003577 }
3578 PyErr_Clear();
3579}
3580
3581
3582PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003583PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003584{
3585 PyObject *s = PyString_FromString(cp);
3586 if (s == NULL)
3587 return NULL;
3588 PyString_InternInPlace(&s);
3589 return s;
3590}
3591
3592#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003593
3594void
Fred Drakeba096332000-07-09 07:04:36 +00003595PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003596{
3597 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003598 for (i = 0; i < UCHAR_MAX + 1; i++) {
3599 Py_XDECREF(characters[i]);
3600 characters[i] = NULL;
3601 }
3602#ifndef DONT_SHARE_SHORT_STRINGS
3603 Py_XDECREF(nullstring);
3604 nullstring = NULL;
3605#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003606#ifdef INTERN_STRINGS
3607 if (interned) {
3608 int pos, changed;
3609 PyObject *key, *value;
3610 do {
3611 changed = 0;
3612 pos = 0;
3613 while (PyDict_Next(interned, &pos, &key, &value)) {
3614 if (key->ob_refcnt == 2 && key == value) {
3615 PyDict_DelItem(interned, key);
3616 changed = 1;
3617 }
3618 }
3619 } while (changed);
3620 }
3621#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003622}
Barry Warsawa903ad982001-02-23 16:40:48 +00003623
3624#ifdef INTERN_STRINGS
3625void _Py_ReleaseInternedStrings(void)
3626{
3627 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003628 fprintf(stderr, "releasing interned strings\n");
3629 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003630 Py_DECREF(interned);
3631 interned = NULL;
3632 }
3633}
3634#endif /* INTERN_STRINGS */