blob: b220859a6294b9c278eeda6a0e6bccb0fc1bcae4 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000302 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000303 va_list vargs;
304
305#ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307#else
308 va_start(vargs);
309#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313}
314
315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000316PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
320{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000321 PyObject *v, *str;
322
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
329}
330
331PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
334{
335 PyObject *v;
336
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
340 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000341
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342 if (encoding == NULL) {
343#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000344 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000345#else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348#endif
349 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350
351 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355
356 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 return NULL;
360}
361
362PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v;
367
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
388 }
389
390 return v;
391
392 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 return NULL;
394}
395
396PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
409}
410
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
420 }
421
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422 if (encoding == NULL) {
423#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428#endif
429 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000435
436 return v;
437
438 onError:
439 return NULL;
440}
441
442PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v;
447
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000448 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449 if (v == NULL)
450 goto onError;
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
468 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000471
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 onError:
473 return NULL;
474}
475
Guido van Rossum234f9421993-06-17 12:35:49 +0000476static void
Fred Drakeba096332000-07-09 07:04:36 +0000477string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000478{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000480}
481
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000482static int
483string_getsize(register PyObject *op)
484{
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
490}
491
492static /*const*/ char *
493string_getbuffer(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
500}
501
Guido van Rossumd7047b31995-01-02 19:07:15 +0000502int
Fred Drakeba096332000-07-09 07:04:36 +0000503PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000505 if (!PyString_Check(op))
506 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000511PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000513 if (!PyString_Check(op))
514 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518int
519PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
522{
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
526 }
527
528 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000529#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535 else
536#endif
537 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
542 }
543 }
544
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
552 }
553 return 0;
554}
555
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556/* Methods */
557
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560{
561 int i;
562 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000563 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000567 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569
Thomas Wouters7e474022000-07-16 12:04:32 +0000570 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
574
575 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000589 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000596string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
603 }
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000606 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
608 else {
609 register int i;
610 register char c;
611 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 int quote;
613
Thomas Wouters7e474022000-07-16 12:04:32 +0000614 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 }
635 else
636 *p++ = c;
637 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000642 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644}
645
Guido van Rossum189f1df2001-05-01 16:51:53 +0000646static PyObject *
647string_str(PyObject *s)
648{
649 Py_INCREF(s);
650 return s;
651}
652
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653static int
Fred Drakeba096332000-07-09 07:04:36 +0000654string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655{
656 return a->ob_size;
657}
658
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000660string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661{
662 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000668#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000669 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 return NULL;
673 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000676 if ((a->ob_size == 0 || b->ob_size == 0) &&
677 PyString_CheckExact(a) && PyString_CheckExact(b)) {
678 if (a->ob_size == 0) {
679 Py_INCREF(bb);
680 return bb;
681 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000682 Py_INCREF(a);
683 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684 }
685 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000686 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000688 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000689 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000690 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000691 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000692#ifdef CACHE_HASH
693 op->ob_shash = -1;
694#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000695#ifdef INTERN_STRINGS
696 op->ob_sinterned = NULL;
697#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000698 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
699 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
700 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702#undef b
703}
704
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000705static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000706string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707{
708 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000709 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000711 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712 if (n < 0)
713 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000718 if (n && size / n != a->ob_size) {
719 PyErr_SetString(PyExc_OverflowError,
720 "repeated string is too long");
721 return NULL;
722 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000723 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 Py_INCREF(a);
725 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 }
Tim Peters8f422462000-09-09 06:13:41 +0000727 nbytes = size * sizeof(char);
728 if (nbytes / sizeof(char) != (size_t)size ||
729 nbytes + sizeof(PyStringObject) <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated string is too long");
732 return NULL;
733 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000734 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000735 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000736 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000738 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000739#ifdef CACHE_HASH
740 op->ob_shash = -1;
741#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000742#ifdef INTERN_STRINGS
743 op->ob_sinterned = NULL;
744#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000745 for (i = 0; i < size; i += a->ob_size)
746 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
747 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749}
750
751/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
752
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000754string_slice(register PyStringObject *a, register int i, register int j)
755 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756{
757 if (i < 0)
758 i = 0;
759 if (j < 0)
760 j = 0; /* Avoid signed/unsigned bug in next line */
761 if (j > a->ob_size)
762 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000763 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
764 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 Py_INCREF(a);
766 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
768 if (j < i)
769 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771}
772
Guido van Rossum9284a572000-03-07 15:53:43 +0000773static int
Fred Drakeba096332000-07-09 07:04:36 +0000774string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000775{
776 register char *s, *end;
777 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000778#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000779 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000780 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000781#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000782 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000783 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000784 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000785 return -1;
786 }
787 c = PyString_AsString(el)[0];
788 s = PyString_AsString(a);
789 end = s + PyString_Size(a);
790 while (s < end) {
791 if (c == *s++)
792 return 1;
793 }
794 return 0;
795}
796
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000798string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000800 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000801 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000803 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804 return NULL;
805 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000807 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000808 if (v == NULL)
809 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000810 else {
811#ifdef COUNT_ALLOCS
812 one_strings++;
813#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000814 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000815 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000816 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817}
818
Martin v. Löwiscd353062001-05-24 16:56:35 +0000819static PyObject*
820string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000822 int c;
823 int len_a, len_b;
824 int min_len;
825 PyObject *result;
826
827 /* One of the objects is a string object. Make sure the
828 other one is one, too. */
829 if (a->ob_type != b->ob_type) {
830 result = Py_NotImplemented;
831 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000832 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000833 if (a == b) {
834 switch (op) {
835 case Py_EQ:case Py_LE:case Py_GE:
836 result = Py_True;
837 goto out;
838 case Py_NE:case Py_LT:case Py_GT:
839 result = Py_False;
840 goto out;
841 }
842 }
843 if (op == Py_EQ) {
844 /* Supporting Py_NE here as well does not save
845 much time, since Py_NE is rarely used. */
846 if (a->ob_size == b->ob_size
847 && (a->ob_sval[0] == b->ob_sval[0]
848 && memcmp(a->ob_sval, b->ob_sval,
849 a->ob_size) == 0)) {
850 result = Py_True;
851 } else {
852 result = Py_False;
853 }
854 goto out;
855 }
856 len_a = a->ob_size; len_b = b->ob_size;
857 min_len = (len_a < len_b) ? len_a : len_b;
858 if (min_len > 0) {
859 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
860 if (c==0)
861 c = memcmp(a->ob_sval, b->ob_sval, min_len);
862 }else
863 c = 0;
864 if (c == 0)
865 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
866 switch (op) {
867 case Py_LT: c = c < 0; break;
868 case Py_LE: c = c <= 0; break;
869 case Py_EQ: assert(0); break; /* unreachable */
870 case Py_NE: c = c != 0; break;
871 case Py_GT: c = c > 0; break;
872 case Py_GE: c = c >= 0; break;
873 default:
874 result = Py_NotImplemented;
875 goto out;
876 }
877 result = c ? Py_True : Py_False;
878 out:
879 Py_INCREF(result);
880 return result;
881}
882
883int
884_PyString_Eq(PyObject *o1, PyObject *o2)
885{
886 PyStringObject *a, *b;
887 a = (PyStringObject*)o1;
888 b = (PyStringObject*)o2;
889 return a->ob_size == b->ob_size
890 && *a->ob_sval == *b->ob_sval
891 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892}
893
Guido van Rossum9bfef441993-03-29 10:43:31 +0000894static long
Fred Drakeba096332000-07-09 07:04:36 +0000895string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000896{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000897 register int len;
898 register unsigned char *p;
899 register long x;
900
901#ifdef CACHE_HASH
902 if (a->ob_shash != -1)
903 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000904#ifdef INTERN_STRINGS
905 if (a->ob_sinterned != NULL)
906 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000908#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000909#endif
910 len = a->ob_size;
911 p = (unsigned char *) a->ob_sval;
912 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000913 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000914 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000915 x ^= a->ob_size;
916 if (x == -1)
917 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000918#ifdef CACHE_HASH
919 a->ob_shash = x;
920#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921 return x;
922}
923
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000924static int
Fred Drakeba096332000-07-09 07:04:36 +0000925string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000926{
927 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000928 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000929 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000930 return -1;
931 }
932 *ptr = (void *)self->ob_sval;
933 return self->ob_size;
934}
935
936static int
Fred Drakeba096332000-07-09 07:04:36 +0000937string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000938{
Guido van Rossum045e6881997-09-08 18:30:11 +0000939 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000940 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000941 return -1;
942}
943
944static int
Fred Drakeba096332000-07-09 07:04:36 +0000945string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000946{
947 if ( lenp )
948 *lenp = self->ob_size;
949 return 1;
950}
951
Guido van Rossum1db70701998-10-08 02:18:52 +0000952static int
Fred Drakeba096332000-07-09 07:04:36 +0000953string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000954{
955 if ( index != 0 ) {
956 PyErr_SetString(PyExc_SystemError,
957 "accessing non-existent string segment");
958 return -1;
959 }
960 *ptr = self->ob_sval;
961 return self->ob_size;
962}
963
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000965 (inquiry)string_length, /*sq_length*/
966 (binaryfunc)string_concat, /*sq_concat*/
967 (intargfunc)string_repeat, /*sq_repeat*/
968 (intargfunc)string_item, /*sq_item*/
969 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000970 0, /*sq_ass_item*/
971 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000972 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973};
974
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000975static PyBufferProcs string_as_buffer = {
976 (getreadbufferproc)string_buffer_getreadbuf,
977 (getwritebufferproc)string_buffer_getwritebuf,
978 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000979 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980};
981
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000982
983
984#define LEFTSTRIP 0
985#define RIGHTSTRIP 1
986#define BOTHSTRIP 2
987
988
989static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000990split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000992 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000993 PyObject* item;
994 PyObject *list = PyList_New(0);
995
996 if (list == NULL)
997 return NULL;
998
Guido van Rossum4c08d552000-03-10 22:55:18 +0000999 for (i = j = 0; i < len; ) {
1000 while (i < len && isspace(Py_CHARMASK(s[i])))
1001 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001002 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001003 while (i < len && !isspace(Py_CHARMASK(s[i])))
1004 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001005 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001006 if (maxsplit-- <= 0)
1007 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1009 if (item == NULL)
1010 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001011 err = PyList_Append(list, item);
1012 Py_DECREF(item);
1013 if (err < 0)
1014 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001015 while (i < len && isspace(Py_CHARMASK(s[i])))
1016 i++;
1017 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001018 }
1019 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001020 if (j < len) {
1021 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1022 if (item == NULL)
1023 goto finally;
1024 err = PyList_Append(list, item);
1025 Py_DECREF(item);
1026 if (err < 0)
1027 goto finally;
1028 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029 return list;
1030 finally:
1031 Py_DECREF(list);
1032 return NULL;
1033}
1034
1035
1036static char split__doc__[] =
1037"S.split([sep [,maxsplit]]) -> list of strings\n\
1038\n\
1039Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001040delimiter string. If maxsplit is given, at most maxsplit\n\
1041splits are done. If sep is not specified, any whitespace string\n\
1042is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043
1044static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001045string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001046{
1047 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001048 int maxsplit = -1;
1049 const char *s = PyString_AS_STRING(self), *sub;
1050 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051
Guido van Rossum4c08d552000-03-10 22:55:18 +00001052 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001054 if (maxsplit < 0)
1055 maxsplit = INT_MAX;
1056 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001057 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001058 if (PyString_Check(subobj)) {
1059 sub = PyString_AS_STRING(subobj);
1060 n = PyString_GET_SIZE(subobj);
1061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001062#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001063 else if (PyUnicode_Check(subobj))
1064 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001065#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001066 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1067 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068 if (n == 0) {
1069 PyErr_SetString(PyExc_ValueError, "empty separator");
1070 return NULL;
1071 }
1072
1073 list = PyList_New(0);
1074 if (list == NULL)
1075 return NULL;
1076
1077 i = j = 0;
1078 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001079 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 if (maxsplit-- <= 0)
1081 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001082 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1083 if (item == NULL)
1084 goto fail;
1085 err = PyList_Append(list, item);
1086 Py_DECREF(item);
1087 if (err < 0)
1088 goto fail;
1089 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090 }
1091 else
1092 i++;
1093 }
1094 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1095 if (item == NULL)
1096 goto fail;
1097 err = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (err < 0)
1100 goto fail;
1101
1102 return list;
1103
1104 fail:
1105 Py_DECREF(list);
1106 return NULL;
1107}
1108
1109
1110static char join__doc__[] =
1111"S.join(sequence) -> string\n\
1112\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001113Return a string which is the concatenation of the strings in the\n\
1114sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001115
1116static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001117string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118{
1119 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001120 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122 char *p;
1123 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001124 size_t sz = 0;
1125 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001126 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127
Tim Peters19fe14e2001-01-19 03:03:47 +00001128 seq = PySequence_Fast(orig, "");
1129 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001130 if (PyErr_ExceptionMatches(PyExc_TypeError))
1131 PyErr_Format(PyExc_TypeError,
1132 "sequence expected, %.80s found",
1133 orig->ob_type->tp_name);
1134 return NULL;
1135 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001136
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001137 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001138 if (seqlen == 0) {
1139 Py_DECREF(seq);
1140 return PyString_FromString("");
1141 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001143 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001144 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1145 PyErr_Format(PyExc_TypeError,
1146 "sequence item 0: expected string,"
1147 " %.80s found",
1148 item->ob_type->tp_name);
1149 Py_DECREF(seq);
1150 return NULL;
1151 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001152 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001153 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001154 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001156
Tim Peters19fe14e2001-01-19 03:03:47 +00001157 /* There are at least two things to join. Do a pre-pass to figure out
1158 * the total amount of space we'll need (sz), see whether any argument
1159 * is absurd, and defer to the Unicode join if appropriate.
1160 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001161 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001162 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001163 item = PySequence_Fast_GET_ITEM(seq, i);
1164 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001165#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001166 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001167 /* Defer to Unicode join.
1168 * CAUTION: There's no gurantee that the
1169 * original sequence can be iterated over
1170 * again, so we must pass seq here.
1171 */
1172 PyObject *result;
1173 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001174 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001175 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001177#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001178 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001179 "sequence item %i: expected string,"
1180 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001181 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001182 Py_DECREF(seq);
1183 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001184 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001185 sz += PyString_GET_SIZE(item);
1186 if (i != 0)
1187 sz += seplen;
1188 if (sz < old_sz || sz > INT_MAX) {
1189 PyErr_SetString(PyExc_OverflowError,
1190 "join() is too long for a Python string");
1191 Py_DECREF(seq);
1192 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001193 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001194 }
1195
1196 /* Allocate result space. */
1197 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1198 if (res == NULL) {
1199 Py_DECREF(seq);
1200 return NULL;
1201 }
1202
1203 /* Catenate everything. */
1204 p = PyString_AS_STRING(res);
1205 for (i = 0; i < seqlen; ++i) {
1206 size_t n;
1207 item = PySequence_Fast_GET_ITEM(seq, i);
1208 n = PyString_GET_SIZE(item);
1209 memcpy(p, PyString_AS_STRING(item), n);
1210 p += n;
1211 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001212 memcpy(p, sep, seplen);
1213 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001214 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001215 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001216
Jeremy Hylton49048292000-07-11 03:28:17 +00001217 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219}
1220
Tim Peters52e155e2001-06-16 05:42:57 +00001221PyObject *
1222_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001223{
Tim Petersa7259592001-06-16 05:11:17 +00001224 assert(sep != NULL && PyString_Check(sep));
1225 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001226 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001227}
1228
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229static long
Fred Drakeba096332000-07-09 07:04:36 +00001230string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001232 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233 int len = PyString_GET_SIZE(self);
1234 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001235 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001237 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001238 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239 return -2;
1240 if (PyString_Check(subobj)) {
1241 sub = PyString_AS_STRING(subobj);
1242 n = PyString_GET_SIZE(subobj);
1243 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001244#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001245 else if (PyUnicode_Check(subobj))
1246 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001247#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001248 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249 return -2;
1250
1251 if (last > len)
1252 last = len;
1253 if (last < 0)
1254 last += len;
1255 if (last < 0)
1256 last = 0;
1257 if (i < 0)
1258 i += len;
1259 if (i < 0)
1260 i = 0;
1261
Guido van Rossum4c08d552000-03-10 22:55:18 +00001262 if (dir > 0) {
1263 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001264 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 last -= n;
1266 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001267 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 return (long)i;
1269 }
1270 else {
1271 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001272
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273 if (n == 0 && i <= last)
1274 return (long)last;
1275 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001276 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001277 return (long)j;
1278 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001279
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 return -1;
1281}
1282
1283
1284static char find__doc__[] =
1285"S.find(sub [,start [,end]]) -> int\n\
1286\n\
1287Return the lowest index in S where substring sub is found,\n\
1288such that sub is contained within s[start,end]. Optional\n\
1289arguments start and end are interpreted as in slice notation.\n\
1290\n\
1291Return -1 on failure.";
1292
1293static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001294string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297 if (result == -2)
1298 return NULL;
1299 return PyInt_FromLong(result);
1300}
1301
1302
1303static char index__doc__[] =
1304"S.index(sub [,start [,end]]) -> int\n\
1305\n\
1306Like S.find() but raise ValueError when the substring is not found.";
1307
1308static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001309string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001311 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 if (result == -2)
1313 return NULL;
1314 if (result == -1) {
1315 PyErr_SetString(PyExc_ValueError,
1316 "substring not found in string.index");
1317 return NULL;
1318 }
1319 return PyInt_FromLong(result);
1320}
1321
1322
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323static char rfind__doc__[] =
1324"S.rfind(sub [,start [,end]]) -> int\n\
1325\n\
1326Return the highest index in S where substring sub is found,\n\
1327such that sub is contained within s[start,end]. Optional\n\
1328arguments start and end are interpreted as in slice notation.\n\
1329\n\
1330Return -1 on failure.";
1331
1332static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001333string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 if (result == -2)
1337 return NULL;
1338 return PyInt_FromLong(result);
1339}
1340
1341
1342static char rindex__doc__[] =
1343"S.rindex(sub [,start [,end]]) -> int\n\
1344\n\
1345Like S.rfind() but raise ValueError when the substring is not found.";
1346
1347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001348string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001350 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351 if (result == -2)
1352 return NULL;
1353 if (result == -1) {
1354 PyErr_SetString(PyExc_ValueError,
1355 "substring not found in string.rindex");
1356 return NULL;
1357 }
1358 return PyInt_FromLong(result);
1359}
1360
1361
1362static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001363do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364{
1365 char *s = PyString_AS_STRING(self);
1366 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368 i = 0;
1369 if (striptype != RIGHTSTRIP) {
1370 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1371 i++;
1372 }
1373 }
1374
1375 j = len;
1376 if (striptype != LEFTSTRIP) {
1377 do {
1378 j--;
1379 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1380 j++;
1381 }
1382
Tim Peters8fa5dd02001-09-12 02:18:30 +00001383 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384 Py_INCREF(self);
1385 return (PyObject*)self;
1386 }
1387 else
1388 return PyString_FromStringAndSize(s+i, j-i);
1389}
1390
1391
1392static char strip__doc__[] =
1393"S.strip() -> string\n\
1394\n\
1395Return a copy of the string S with leading and trailing\n\
1396whitespace removed.";
1397
1398static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001399string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001401 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402}
1403
1404
1405static char lstrip__doc__[] =
1406"S.lstrip() -> string\n\
1407\n\
1408Return a copy of the string S with leading whitespace removed.";
1409
1410static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001411string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001413 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414}
1415
1416
1417static char rstrip__doc__[] =
1418"S.rstrip() -> string\n\
1419\n\
1420Return a copy of the string S with trailing whitespace removed.";
1421
1422static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001423string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001425 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426}
1427
1428
1429static char lower__doc__[] =
1430"S.lower() -> string\n\
1431\n\
1432Return a copy of the string S converted to lowercase.";
1433
1434static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001435string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436{
1437 char *s = PyString_AS_STRING(self), *s_new;
1438 int i, n = PyString_GET_SIZE(self);
1439 PyObject *new;
1440
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 new = PyString_FromStringAndSize(NULL, n);
1442 if (new == NULL)
1443 return NULL;
1444 s_new = PyString_AsString(new);
1445 for (i = 0; i < n; i++) {
1446 int c = Py_CHARMASK(*s++);
1447 if (isupper(c)) {
1448 *s_new = tolower(c);
1449 } else
1450 *s_new = c;
1451 s_new++;
1452 }
1453 return new;
1454}
1455
1456
1457static char upper__doc__[] =
1458"S.upper() -> string\n\
1459\n\
1460Return a copy of the string S converted to uppercase.";
1461
1462static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001463string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464{
1465 char *s = PyString_AS_STRING(self), *s_new;
1466 int i, n = PyString_GET_SIZE(self);
1467 PyObject *new;
1468
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469 new = PyString_FromStringAndSize(NULL, n);
1470 if (new == NULL)
1471 return NULL;
1472 s_new = PyString_AsString(new);
1473 for (i = 0; i < n; i++) {
1474 int c = Py_CHARMASK(*s++);
1475 if (islower(c)) {
1476 *s_new = toupper(c);
1477 } else
1478 *s_new = c;
1479 s_new++;
1480 }
1481 return new;
1482}
1483
1484
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485static char title__doc__[] =
1486"S.title() -> string\n\
1487\n\
1488Return a titlecased version of S, i.e. words start with uppercase\n\
1489characters, all remaining cased characters have lowercase.";
1490
1491static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001492string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493{
1494 char *s = PyString_AS_STRING(self), *s_new;
1495 int i, n = PyString_GET_SIZE(self);
1496 int previous_is_cased = 0;
1497 PyObject *new;
1498
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 new = PyString_FromStringAndSize(NULL, n);
1500 if (new == NULL)
1501 return NULL;
1502 s_new = PyString_AsString(new);
1503 for (i = 0; i < n; i++) {
1504 int c = Py_CHARMASK(*s++);
1505 if (islower(c)) {
1506 if (!previous_is_cased)
1507 c = toupper(c);
1508 previous_is_cased = 1;
1509 } else if (isupper(c)) {
1510 if (previous_is_cased)
1511 c = tolower(c);
1512 previous_is_cased = 1;
1513 } else
1514 previous_is_cased = 0;
1515 *s_new++ = c;
1516 }
1517 return new;
1518}
1519
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520static char capitalize__doc__[] =
1521"S.capitalize() -> string\n\
1522\n\
1523Return a copy of the string S with only its first character\n\
1524capitalized.";
1525
1526static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001527string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528{
1529 char *s = PyString_AS_STRING(self), *s_new;
1530 int i, n = PyString_GET_SIZE(self);
1531 PyObject *new;
1532
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 new = PyString_FromStringAndSize(NULL, n);
1534 if (new == NULL)
1535 return NULL;
1536 s_new = PyString_AsString(new);
1537 if (0 < n) {
1538 int c = Py_CHARMASK(*s++);
1539 if (islower(c))
1540 *s_new = toupper(c);
1541 else
1542 *s_new = c;
1543 s_new++;
1544 }
1545 for (i = 1; i < n; i++) {
1546 int c = Py_CHARMASK(*s++);
1547 if (isupper(c))
1548 *s_new = tolower(c);
1549 else
1550 *s_new = c;
1551 s_new++;
1552 }
1553 return new;
1554}
1555
1556
1557static char count__doc__[] =
1558"S.count(sub[, start[, end]]) -> int\n\
1559\n\
1560Return the number of occurrences of substring sub in string\n\
1561S[start:end]. Optional arguments start and end are\n\
1562interpreted as in slice notation.";
1563
1564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001565string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 int len = PyString_GET_SIZE(self), n;
1569 int i = 0, last = INT_MAX;
1570 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572
Guido van Rossumc6821402000-05-08 14:08:05 +00001573 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1574 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001576
Guido van Rossum4c08d552000-03-10 22:55:18 +00001577 if (PyString_Check(subobj)) {
1578 sub = PyString_AS_STRING(subobj);
1579 n = PyString_GET_SIZE(subobj);
1580 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001581#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001582 else if (PyUnicode_Check(subobj)) {
1583 int count;
1584 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1585 if (count == -1)
1586 return NULL;
1587 else
1588 return PyInt_FromLong((long) count);
1589 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001590#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1592 return NULL;
1593
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594 if (last > len)
1595 last = len;
1596 if (last < 0)
1597 last += len;
1598 if (last < 0)
1599 last = 0;
1600 if (i < 0)
1601 i += len;
1602 if (i < 0)
1603 i = 0;
1604 m = last + 1 - n;
1605 if (n == 0)
1606 return PyInt_FromLong((long) (m-i));
1607
1608 r = 0;
1609 while (i < m) {
1610 if (!memcmp(s+i, sub, n)) {
1611 r++;
1612 i += n;
1613 } else {
1614 i++;
1615 }
1616 }
1617 return PyInt_FromLong((long) r);
1618}
1619
1620
1621static char swapcase__doc__[] =
1622"S.swapcase() -> string\n\
1623\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001624Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625converted to lowercase and vice versa.";
1626
1627static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001628string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629{
1630 char *s = PyString_AS_STRING(self), *s_new;
1631 int i, n = PyString_GET_SIZE(self);
1632 PyObject *new;
1633
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634 new = PyString_FromStringAndSize(NULL, n);
1635 if (new == NULL)
1636 return NULL;
1637 s_new = PyString_AsString(new);
1638 for (i = 0; i < n; i++) {
1639 int c = Py_CHARMASK(*s++);
1640 if (islower(c)) {
1641 *s_new = toupper(c);
1642 }
1643 else if (isupper(c)) {
1644 *s_new = tolower(c);
1645 }
1646 else
1647 *s_new = c;
1648 s_new++;
1649 }
1650 return new;
1651}
1652
1653
1654static char translate__doc__[] =
1655"S.translate(table [,deletechars]) -> string\n\
1656\n\
1657Return a copy of the string S, where all characters occurring\n\
1658in the optional argument deletechars are removed, and the\n\
1659remaining characters have been mapped through the given\n\
1660translation table, which must be a string of length 256.";
1661
1662static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001663string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 register char *input, *output;
1666 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667 register int i, c, changed = 0;
1668 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 int inlen, tablen, dellen = 0;
1671 PyObject *result;
1672 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675 if (!PyArg_ParseTuple(args, "O|O:translate",
1676 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001678
1679 if (PyString_Check(tableobj)) {
1680 table1 = PyString_AS_STRING(tableobj);
1681 tablen = PyString_GET_SIZE(tableobj);
1682 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001683#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001685 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001686 parameter; instead a mapping to None will cause characters
1687 to be deleted. */
1688 if (delobj != NULL) {
1689 PyErr_SetString(PyExc_TypeError,
1690 "deletions are implemented differently for unicode");
1691 return NULL;
1692 }
1693 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1694 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001695#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001697 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698
1699 if (delobj != NULL) {
1700 if (PyString_Check(delobj)) {
1701 del_table = PyString_AS_STRING(delobj);
1702 dellen = PyString_GET_SIZE(delobj);
1703 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001704#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001705 else if (PyUnicode_Check(delobj)) {
1706 PyErr_SetString(PyExc_TypeError,
1707 "deletions are implemented differently for unicode");
1708 return NULL;
1709 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001710#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001711 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1712 return NULL;
1713
1714 if (tablen != 256) {
1715 PyErr_SetString(PyExc_ValueError,
1716 "translation table must be 256 characters long");
1717 return NULL;
1718 }
1719 }
1720 else {
1721 del_table = NULL;
1722 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 }
1724
1725 table = table1;
1726 inlen = PyString_Size(input_obj);
1727 result = PyString_FromStringAndSize((char *)NULL, inlen);
1728 if (result == NULL)
1729 return NULL;
1730 output_start = output = PyString_AsString(result);
1731 input = PyString_AsString(input_obj);
1732
1733 if (dellen == 0) {
1734 /* If no deletions are required, use faster code */
1735 for (i = inlen; --i >= 0; ) {
1736 c = Py_CHARMASK(*input++);
1737 if (Py_CHARMASK((*output++ = table[c])) != c)
1738 changed = 1;
1739 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001740 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 return result;
1742 Py_DECREF(result);
1743 Py_INCREF(input_obj);
1744 return input_obj;
1745 }
1746
1747 for (i = 0; i < 256; i++)
1748 trans_table[i] = Py_CHARMASK(table[i]);
1749
1750 for (i = 0; i < dellen; i++)
1751 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1752
1753 for (i = inlen; --i >= 0; ) {
1754 c = Py_CHARMASK(*input++);
1755 if (trans_table[c] != -1)
1756 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1757 continue;
1758 changed = 1;
1759 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001760 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 Py_DECREF(result);
1762 Py_INCREF(input_obj);
1763 return input_obj;
1764 }
1765 /* Fix the size of the resulting string */
1766 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1767 return NULL;
1768 return result;
1769}
1770
1771
1772/* What follows is used for implementing replace(). Perry Stoll. */
1773
1774/*
1775 mymemfind
1776
1777 strstr replacement for arbitrary blocks of memory.
1778
Barry Warsaw51ac5802000-03-20 16:36:48 +00001779 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780 contents of memory pointed to by PAT. Returns the index into MEM if
1781 found, or -1 if not found. If len of PAT is greater than length of
1782 MEM, the function returns -1.
1783*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001784static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001785mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786{
1787 register int ii;
1788
1789 /* pattern can not occur in the last pat_len-1 chars */
1790 len -= pat_len;
1791
1792 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001793 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794 return ii;
1795 }
1796 }
1797 return -1;
1798}
1799
1800/*
1801 mymemcnt
1802
1803 Return the number of distinct times PAT is found in MEM.
1804 meaning mem=1111 and pat==11 returns 2.
1805 mem=11111 and pat==11 also return 2.
1806 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001807static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001808mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809{
1810 register int offset = 0;
1811 int nfound = 0;
1812
1813 while (len >= 0) {
1814 offset = mymemfind(mem, len, pat, pat_len);
1815 if (offset == -1)
1816 break;
1817 mem += offset + pat_len;
1818 len -= offset + pat_len;
1819 nfound++;
1820 }
1821 return nfound;
1822}
1823
1824/*
1825 mymemreplace
1826
Thomas Wouters7e474022000-07-16 12:04:32 +00001827 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828 replaced with SUB.
1829
Thomas Wouters7e474022000-07-16 12:04:32 +00001830 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 of PAT in STR, then the original string is returned. Otherwise, a new
1832 string is allocated here and returned.
1833
1834 on return, out_len is:
1835 the length of output string, or
1836 -1 if the input string is returned, or
1837 unchanged if an error occurs (no memory).
1838
1839 return value is:
1840 the new string allocated locally, or
1841 NULL if an error occurred.
1842*/
1843static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001844mymemreplace(const char *str, int len, /* input string */
1845 const char *pat, int pat_len, /* pattern string to find */
1846 const char *sub, int sub_len, /* substitution string */
1847 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001848 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849{
1850 char *out_s;
1851 char *new_s;
1852 int nfound, offset, new_len;
1853
1854 if (len == 0 || pat_len > len)
1855 goto return_same;
1856
1857 /* find length of output string */
1858 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001859 if (count < 0)
1860 count = INT_MAX;
1861 else if (nfound > count)
1862 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 if (nfound == 0)
1864 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001865
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001867 if (new_len == 0) {
1868 /* Have to allocate something for the caller to free(). */
1869 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001870 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001871 return NULL;
1872 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001874 else {
1875 assert(new_len > 0);
1876 new_s = (char *)PyMem_MALLOC(new_len);
1877 if (new_s == NULL)
1878 return NULL;
1879 out_s = new_s;
1880
Tim Peters9c012af2001-05-10 00:32:57 +00001881 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001882 /* find index of next instance of pattern */
1883 offset = mymemfind(str, len, pat, pat_len);
1884 if (offset == -1)
1885 break;
1886
1887 /* copy non matching part of input string */
1888 memcpy(new_s, str, offset);
1889 str += offset + pat_len;
1890 len -= offset + pat_len;
1891
1892 /* copy substitute into the output string */
1893 new_s += offset;
1894 memcpy(new_s, sub, sub_len);
1895 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001896 }
1897 /* copy any remaining values into output string */
1898 if (len > 0)
1899 memcpy(new_s, str, len);
1900 }
1901 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902 return out_s;
1903
1904 return_same:
1905 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001906 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907}
1908
1909
1910static char replace__doc__[] =
1911"S.replace (old, new[, maxsplit]) -> string\n\
1912\n\
1913Return a copy of string S with all occurrences of substring\n\
1914old replaced by new. If the optional argument maxsplit is\n\
1915given, only the first maxsplit occurrences are replaced.";
1916
1917static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001918string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001920 const char *str = PyString_AS_STRING(self), *sub, *repl;
1921 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001922 const int len = PyString_GET_SIZE(self);
1923 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001924 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001926 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928 if (!PyArg_ParseTuple(args, "OO|i:replace",
1929 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001931
1932 if (PyString_Check(subobj)) {
1933 sub = PyString_AS_STRING(subobj);
1934 sub_len = PyString_GET_SIZE(subobj);
1935 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001936#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001938 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001940#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1942 return NULL;
1943
1944 if (PyString_Check(replobj)) {
1945 repl = PyString_AS_STRING(replobj);
1946 repl_len = PyString_GET_SIZE(replobj);
1947 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001948#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001950 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001952#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001953 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1954 return NULL;
1955
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001956 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001957 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 return NULL;
1959 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001960 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 if (new_s == NULL) {
1962 PyErr_NoMemory();
1963 return NULL;
1964 }
1965 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001966 if (PyString_CheckExact(self)) {
1967 /* we're returning another reference to self */
1968 new = (PyObject*)self;
1969 Py_INCREF(new);
1970 }
1971 else {
1972 new = PyString_FromStringAndSize(str, len);
1973 if (new == NULL)
1974 return NULL;
1975 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 }
1977 else {
1978 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001979 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980 }
1981 return new;
1982}
1983
1984
1985static char startswith__doc__[] =
1986"S.startswith(prefix[, start[, end]]) -> int\n\
1987\n\
1988Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1989optional start, test S beginning at that position. With optional end, stop\n\
1990comparing S at that position.";
1991
1992static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001993string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001997 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998 int plen;
1999 int start = 0;
2000 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002001 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002
Guido van Rossumc6821402000-05-08 14:08:05 +00002003 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2004 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 return NULL;
2006 if (PyString_Check(subobj)) {
2007 prefix = PyString_AS_STRING(subobj);
2008 plen = PyString_GET_SIZE(subobj);
2009 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002010#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002011 else if (PyUnicode_Check(subobj)) {
2012 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002013 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002014 subobj, start, end, -1);
2015 if (rc == -1)
2016 return NULL;
2017 else
2018 return PyInt_FromLong((long) rc);
2019 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002020#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002021 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022 return NULL;
2023
2024 /* adopt Java semantics for index out of range. it is legal for
2025 * offset to be == plen, but this only returns true if prefix is
2026 * the empty string.
2027 */
2028 if (start < 0 || start+plen > len)
2029 return PyInt_FromLong(0);
2030
2031 if (!memcmp(str+start, prefix, plen)) {
2032 /* did the match end after the specified end? */
2033 if (end < 0)
2034 return PyInt_FromLong(1);
2035 else if (end - start < plen)
2036 return PyInt_FromLong(0);
2037 else
2038 return PyInt_FromLong(1);
2039 }
2040 else return PyInt_FromLong(0);
2041}
2042
2043
2044static char endswith__doc__[] =
2045"S.endswith(suffix[, start[, end]]) -> int\n\
2046\n\
2047Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2048optional start, test S beginning at that position. With optional end, stop\n\
2049comparing S at that position.";
2050
2051static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002052string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002055 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 const char* suffix;
2057 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058 int start = 0;
2059 int end = -1;
2060 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062
Guido van Rossumc6821402000-05-08 14:08:05 +00002063 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2064 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065 return NULL;
2066 if (PyString_Check(subobj)) {
2067 suffix = PyString_AS_STRING(subobj);
2068 slen = PyString_GET_SIZE(subobj);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002071 else if (PyUnicode_Check(subobj)) {
2072 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002073 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002074 subobj, start, end, +1);
2075 if (rc == -1)
2076 return NULL;
2077 else
2078 return PyInt_FromLong((long) rc);
2079 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002080#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082 return NULL;
2083
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085 return PyInt_FromLong(0);
2086
2087 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091 return PyInt_FromLong(1);
2092 else return PyInt_FromLong(0);
2093}
2094
2095
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002096static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002097"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002098\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002099Encodes S using the codec registered for encoding. encoding defaults\n\
2100to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002101handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2102a ValueError. Other possible values are 'ignore' and 'replace'.";
2103
2104static PyObject *
2105string_encode(PyStringObject *self, PyObject *args)
2106{
2107 char *encoding = NULL;
2108 char *errors = NULL;
2109 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2110 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002111 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2112}
2113
2114
2115static char decode__doc__[] =
2116"S.decode([encoding[,errors]]) -> object\n\
2117\n\
2118Decodes S using the codec registered for encoding. encoding defaults\n\
2119to the default encoding. errors may be given to set a different error\n\
2120handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2121a ValueError. Other possible values are 'ignore' and 'replace'.";
2122
2123static PyObject *
2124string_decode(PyStringObject *self, PyObject *args)
2125{
2126 char *encoding = NULL;
2127 char *errors = NULL;
2128 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2129 return NULL;
2130 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002131}
2132
2133
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134static char expandtabs__doc__[] =
2135"S.expandtabs([tabsize]) -> string\n\
2136\n\
2137Return a copy of S where all tab characters are expanded using spaces.\n\
2138If tabsize is not given, a tab size of 8 characters is assumed.";
2139
2140static PyObject*
2141string_expandtabs(PyStringObject *self, PyObject *args)
2142{
2143 const char *e, *p;
2144 char *q;
2145 int i, j;
2146 PyObject *u;
2147 int tabsize = 8;
2148
2149 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2150 return NULL;
2151
Thomas Wouters7e474022000-07-16 12:04:32 +00002152 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002153 i = j = 0;
2154 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2155 for (p = PyString_AS_STRING(self); p < e; p++)
2156 if (*p == '\t') {
2157 if (tabsize > 0)
2158 j += tabsize - (j % tabsize);
2159 }
2160 else {
2161 j++;
2162 if (*p == '\n' || *p == '\r') {
2163 i += j;
2164 j = 0;
2165 }
2166 }
2167
2168 /* Second pass: create output string and fill it */
2169 u = PyString_FromStringAndSize(NULL, i + j);
2170 if (!u)
2171 return NULL;
2172
2173 j = 0;
2174 q = PyString_AS_STRING(u);
2175
2176 for (p = PyString_AS_STRING(self); p < e; p++)
2177 if (*p == '\t') {
2178 if (tabsize > 0) {
2179 i = tabsize - (j % tabsize);
2180 j += i;
2181 while (i--)
2182 *q++ = ' ';
2183 }
2184 }
2185 else {
2186 j++;
2187 *q++ = *p;
2188 if (*p == '\n' || *p == '\r')
2189 j = 0;
2190 }
2191
2192 return u;
2193}
2194
Tim Peters8fa5dd02001-09-12 02:18:30 +00002195static PyObject *
2196pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197{
2198 PyObject *u;
2199
2200 if (left < 0)
2201 left = 0;
2202 if (right < 0)
2203 right = 0;
2204
Tim Peters8fa5dd02001-09-12 02:18:30 +00002205 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 Py_INCREF(self);
2207 return (PyObject *)self;
2208 }
2209
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002210 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211 left + PyString_GET_SIZE(self) + right);
2212 if (u) {
2213 if (left)
2214 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002215 memcpy(PyString_AS_STRING(u) + left,
2216 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217 PyString_GET_SIZE(self));
2218 if (right)
2219 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2220 fill, right);
2221 }
2222
2223 return u;
2224}
2225
2226static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002227"S.ljust(width) -> string\n"
2228"\n"
2229"Return S left justified in a string of length width. Padding is\n"
2230"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231
2232static PyObject *
2233string_ljust(PyStringObject *self, PyObject *args)
2234{
2235 int width;
2236 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2237 return NULL;
2238
Tim Peters8fa5dd02001-09-12 02:18:30 +00002239 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240 Py_INCREF(self);
2241 return (PyObject*) self;
2242 }
2243
2244 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2245}
2246
2247
2248static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002249"S.rjust(width) -> string\n"
2250"\n"
2251"Return S right justified in a string of length width. Padding is\n"
2252"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253
2254static PyObject *
2255string_rjust(PyStringObject *self, PyObject *args)
2256{
2257 int width;
2258 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2259 return NULL;
2260
Tim Peters8fa5dd02001-09-12 02:18:30 +00002261 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 Py_INCREF(self);
2263 return (PyObject*) self;
2264 }
2265
2266 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2267}
2268
2269
2270static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002271"S.center(width) -> string\n"
2272"\n"
2273"Return S centered in a string of length width. Padding is done\n"
2274"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275
2276static PyObject *
2277string_center(PyStringObject *self, PyObject *args)
2278{
2279 int marg, left;
2280 int width;
2281
2282 if (!PyArg_ParseTuple(args, "i:center", &width))
2283 return NULL;
2284
Tim Peters8fa5dd02001-09-12 02:18:30 +00002285 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 Py_INCREF(self);
2287 return (PyObject*) self;
2288 }
2289
2290 marg = width - PyString_GET_SIZE(self);
2291 left = marg / 2 + (marg & width & 1);
2292
2293 return pad(self, left, marg - left, ' ');
2294}
2295
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002297"S.isspace() -> int\n"
2298"\n"
2299"Return 1 if there are only whitespace characters in S,\n"
2300"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301
2302static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002303string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304{
Fred Drakeba096332000-07-09 07:04:36 +00002305 register const unsigned char *p
2306 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002307 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 /* Shortcut for single character strings */
2310 if (PyString_GET_SIZE(self) == 1 &&
2311 isspace(*p))
2312 return PyInt_FromLong(1);
2313
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002314 /* Special case for empty strings */
2315 if (PyString_GET_SIZE(self) == 0)
2316 return PyInt_FromLong(0);
2317
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 e = p + PyString_GET_SIZE(self);
2319 for (; p < e; p++) {
2320 if (!isspace(*p))
2321 return PyInt_FromLong(0);
2322 }
2323 return PyInt_FromLong(1);
2324}
2325
2326
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002327static char isalpha__doc__[] =
2328"S.isalpha() -> int\n\
2329\n\
2330Return 1 if all characters in S are alphabetic\n\
2331and there is at least one character in S, 0 otherwise.";
2332
2333static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002334string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002335{
Fred Drakeba096332000-07-09 07:04:36 +00002336 register const unsigned char *p
2337 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002338 register const unsigned char *e;
2339
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002340 /* Shortcut for single character strings */
2341 if (PyString_GET_SIZE(self) == 1 &&
2342 isalpha(*p))
2343 return PyInt_FromLong(1);
2344
2345 /* Special case for empty strings */
2346 if (PyString_GET_SIZE(self) == 0)
2347 return PyInt_FromLong(0);
2348
2349 e = p + PyString_GET_SIZE(self);
2350 for (; p < e; p++) {
2351 if (!isalpha(*p))
2352 return PyInt_FromLong(0);
2353 }
2354 return PyInt_FromLong(1);
2355}
2356
2357
2358static char isalnum__doc__[] =
2359"S.isalnum() -> int\n\
2360\n\
2361Return 1 if all characters in S are alphanumeric\n\
2362and there is at least one character in S, 0 otherwise.";
2363
2364static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002365string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002366{
Fred Drakeba096332000-07-09 07:04:36 +00002367 register const unsigned char *p
2368 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002369 register const unsigned char *e;
2370
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002371 /* Shortcut for single character strings */
2372 if (PyString_GET_SIZE(self) == 1 &&
2373 isalnum(*p))
2374 return PyInt_FromLong(1);
2375
2376 /* Special case for empty strings */
2377 if (PyString_GET_SIZE(self) == 0)
2378 return PyInt_FromLong(0);
2379
2380 e = p + PyString_GET_SIZE(self);
2381 for (; p < e; p++) {
2382 if (!isalnum(*p))
2383 return PyInt_FromLong(0);
2384 }
2385 return PyInt_FromLong(1);
2386}
2387
2388
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389static char isdigit__doc__[] =
2390"S.isdigit() -> int\n\
2391\n\
2392Return 1 if there are only digit characters in S,\n\
23930 otherwise.";
2394
2395static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002396string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397{
Fred Drakeba096332000-07-09 07:04:36 +00002398 register const unsigned char *p
2399 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002400 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401
Guido van Rossum4c08d552000-03-10 22:55:18 +00002402 /* Shortcut for single character strings */
2403 if (PyString_GET_SIZE(self) == 1 &&
2404 isdigit(*p))
2405 return PyInt_FromLong(1);
2406
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002407 /* Special case for empty strings */
2408 if (PyString_GET_SIZE(self) == 0)
2409 return PyInt_FromLong(0);
2410
Guido van Rossum4c08d552000-03-10 22:55:18 +00002411 e = p + PyString_GET_SIZE(self);
2412 for (; p < e; p++) {
2413 if (!isdigit(*p))
2414 return PyInt_FromLong(0);
2415 }
2416 return PyInt_FromLong(1);
2417}
2418
2419
2420static char islower__doc__[] =
2421"S.islower() -> int\n\
2422\n\
2423Return 1 if all cased characters in S are lowercase and there is\n\
2424at least one cased character in S, 0 otherwise.";
2425
2426static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002427string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428{
Fred Drakeba096332000-07-09 07:04:36 +00002429 register const unsigned char *p
2430 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002431 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 int cased;
2433
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434 /* Shortcut for single character strings */
2435 if (PyString_GET_SIZE(self) == 1)
2436 return PyInt_FromLong(islower(*p) != 0);
2437
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002438 /* Special case for empty strings */
2439 if (PyString_GET_SIZE(self) == 0)
2440 return PyInt_FromLong(0);
2441
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 e = p + PyString_GET_SIZE(self);
2443 cased = 0;
2444 for (; p < e; p++) {
2445 if (isupper(*p))
2446 return PyInt_FromLong(0);
2447 else if (!cased && islower(*p))
2448 cased = 1;
2449 }
2450 return PyInt_FromLong(cased);
2451}
2452
2453
2454static char isupper__doc__[] =
2455"S.isupper() -> int\n\
2456\n\
2457Return 1 if all cased characters in S are uppercase and there is\n\
2458at least one cased character in S, 0 otherwise.";
2459
2460static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002461string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462{
Fred Drakeba096332000-07-09 07:04:36 +00002463 register const unsigned char *p
2464 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002465 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002466 int cased;
2467
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 /* Shortcut for single character strings */
2469 if (PyString_GET_SIZE(self) == 1)
2470 return PyInt_FromLong(isupper(*p) != 0);
2471
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002472 /* Special case for empty strings */
2473 if (PyString_GET_SIZE(self) == 0)
2474 return PyInt_FromLong(0);
2475
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 e = p + PyString_GET_SIZE(self);
2477 cased = 0;
2478 for (; p < e; p++) {
2479 if (islower(*p))
2480 return PyInt_FromLong(0);
2481 else if (!cased && isupper(*p))
2482 cased = 1;
2483 }
2484 return PyInt_FromLong(cased);
2485}
2486
2487
2488static char istitle__doc__[] =
2489"S.istitle() -> int\n\
2490\n\
2491Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2492may only follow uncased characters and lowercase characters only cased\n\
2493ones. Return 0 otherwise.";
2494
2495static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002496string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002497{
Fred Drakeba096332000-07-09 07:04:36 +00002498 register const unsigned char *p
2499 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002500 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 int cased, previous_is_cased;
2502
Guido van Rossum4c08d552000-03-10 22:55:18 +00002503 /* Shortcut for single character strings */
2504 if (PyString_GET_SIZE(self) == 1)
2505 return PyInt_FromLong(isupper(*p) != 0);
2506
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002507 /* Special case for empty strings */
2508 if (PyString_GET_SIZE(self) == 0)
2509 return PyInt_FromLong(0);
2510
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511 e = p + PyString_GET_SIZE(self);
2512 cased = 0;
2513 previous_is_cased = 0;
2514 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002515 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516
2517 if (isupper(ch)) {
2518 if (previous_is_cased)
2519 return PyInt_FromLong(0);
2520 previous_is_cased = 1;
2521 cased = 1;
2522 }
2523 else if (islower(ch)) {
2524 if (!previous_is_cased)
2525 return PyInt_FromLong(0);
2526 previous_is_cased = 1;
2527 cased = 1;
2528 }
2529 else
2530 previous_is_cased = 0;
2531 }
2532 return PyInt_FromLong(cased);
2533}
2534
2535
2536static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002537"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538\n\
2539Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002540Line breaks are not included in the resulting list unless keepends\n\
2541is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542
2543#define SPLIT_APPEND(data, left, right) \
2544 str = PyString_FromStringAndSize(data + left, right - left); \
2545 if (!str) \
2546 goto onError; \
2547 if (PyList_Append(list, str)) { \
2548 Py_DECREF(str); \
2549 goto onError; \
2550 } \
2551 else \
2552 Py_DECREF(str);
2553
2554static PyObject*
2555string_splitlines(PyStringObject *self, PyObject *args)
2556{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002557 register int i;
2558 register int j;
2559 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002560 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 PyObject *list;
2562 PyObject *str;
2563 char *data;
2564
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002565 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566 return NULL;
2567
2568 data = PyString_AS_STRING(self);
2569 len = PyString_GET_SIZE(self);
2570
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 list = PyList_New(0);
2572 if (!list)
2573 goto onError;
2574
2575 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002576 int eol;
2577
Guido van Rossum4c08d552000-03-10 22:55:18 +00002578 /* Find a line and append it */
2579 while (i < len && data[i] != '\n' && data[i] != '\r')
2580 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002581
2582 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002583 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 if (i < len) {
2585 if (data[i] == '\r' && i + 1 < len &&
2586 data[i+1] == '\n')
2587 i += 2;
2588 else
2589 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002590 if (keepends)
2591 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002593 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 j = i;
2595 }
2596 if (j < len) {
2597 SPLIT_APPEND(data, j, len);
2598 }
2599
2600 return list;
2601
2602 onError:
2603 Py_DECREF(list);
2604 return NULL;
2605}
2606
2607#undef SPLIT_APPEND
2608
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002609
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002610static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002612 /* Counterparts of the obsolete stropmodule functions; except
2613 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002614 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2615 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2616 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2617 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2618 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2619 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2620 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2621 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2622 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2623 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2624 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2625 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2626 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2627 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2628 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2629 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2630 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2631 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2632 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2633 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2634 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2635 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2636 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2637 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2638 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2639 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2640 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2641 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2642 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2643 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2644 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2645 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2646 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002648 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002650 {NULL, NULL} /* sentinel */
2651};
2652
Guido van Rossumae960af2001-08-30 03:11:59 +00002653staticforward PyObject *
2654str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2655
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002656static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002657string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002659 PyObject *x = NULL;
2660 static char *kwlist[] = {"object", 0};
2661
Guido van Rossumae960af2001-08-30 03:11:59 +00002662 if (type != &PyString_Type)
2663 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002664 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2665 return NULL;
2666 if (x == NULL)
2667 return PyString_FromString("");
2668 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002669}
2670
Guido van Rossumae960af2001-08-30 03:11:59 +00002671static PyObject *
2672str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2673{
2674 PyObject *tmp, *new;
2675 int n;
2676
2677 assert(PyType_IsSubtype(type, &PyString_Type));
2678 tmp = string_new(&PyString_Type, args, kwds);
2679 if (tmp == NULL)
2680 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002681 assert(PyString_CheckExact(tmp));
Guido van Rossumae960af2001-08-30 03:11:59 +00002682 new = type->tp_alloc(type, n = PyString_GET_SIZE(tmp));
Guido van Rossum29d55a32001-08-31 16:11:15 +00002683 if (new != NULL)
2684 memcpy(PyString_AS_STRING(new), PyString_AS_STRING(tmp), n+1);
2685 Py_DECREF(tmp);
Guido van Rossumae960af2001-08-30 03:11:59 +00002686 return new;
2687}
2688
Tim Peters6d6c1a32001-08-02 04:15:00 +00002689static char string_doc[] =
2690"str(object) -> string\n\
2691\n\
2692Return a nice string representation of the object.\n\
2693If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002694
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002695PyTypeObject PyString_Type = {
2696 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002697 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002698 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002699 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002700 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002701 (destructor)string_dealloc, /* tp_dealloc */
2702 (printfunc)string_print, /* tp_print */
2703 0, /* tp_getattr */
2704 0, /* tp_setattr */
2705 0, /* tp_compare */
2706 (reprfunc)string_repr, /* tp_repr */
2707 0, /* tp_as_number */
2708 &string_as_sequence, /* tp_as_sequence */
2709 0, /* tp_as_mapping */
2710 (hashfunc)string_hash, /* tp_hash */
2711 0, /* tp_call */
2712 (reprfunc)string_str, /* tp_str */
2713 PyObject_GenericGetAttr, /* tp_getattro */
2714 0, /* tp_setattro */
2715 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002716 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002717 string_doc, /* tp_doc */
2718 0, /* tp_traverse */
2719 0, /* tp_clear */
2720 (richcmpfunc)string_richcompare, /* tp_richcompare */
2721 0, /* tp_weaklistoffset */
2722 0, /* tp_iter */
2723 0, /* tp_iternext */
2724 string_methods, /* tp_methods */
2725 0, /* tp_members */
2726 0, /* tp_getset */
2727 0, /* tp_base */
2728 0, /* tp_dict */
2729 0, /* tp_descr_get */
2730 0, /* tp_descr_set */
2731 0, /* tp_dictoffset */
2732 0, /* tp_init */
2733 0, /* tp_alloc */
2734 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002735};
2736
2737void
Fred Drakeba096332000-07-09 07:04:36 +00002738PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002739{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002740 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002741 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002742 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 if (w == NULL || !PyString_Check(*pv)) {
2744 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002745 *pv = NULL;
2746 return;
2747 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002748 v = string_concat((PyStringObject *) *pv, w);
2749 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750 *pv = v;
2751}
2752
Guido van Rossum013142a1994-08-30 08:19:36 +00002753void
Fred Drakeba096332000-07-09 07:04:36 +00002754PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002755{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 PyString_Concat(pv, w);
2757 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002758}
2759
2760
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002761/* The following function breaks the notion that strings are immutable:
2762 it changes the size of a string. We get away with this only if there
2763 is only one module referencing the object. You can also think of it
2764 as creating a new string object and destroying the old one, only
2765 more efficiently. In any case, don't use this if the string may
2766 already be known to some other part of the code... */
2767
2768int
Fred Drakeba096332000-07-09 07:04:36 +00002769_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002770{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 register PyObject *v;
2772 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002773 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002774 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002775 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002776 Py_DECREF(v);
2777 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002778 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002779 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002780 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002781#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002782 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002783#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 _Py_ForgetReference(v);
2785 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002786 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002788 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002789 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002791 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002792 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 _Py_NewReference(*pv);
2794 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002795 sv->ob_size = newsize;
2796 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002797 return 0;
2798}
Guido van Rossume5372401993-03-16 12:15:04 +00002799
2800/* Helpers for formatstring */
2801
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002803getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002804{
2805 int argidx = *p_argidx;
2806 if (argidx < arglen) {
2807 (*p_argidx)++;
2808 if (arglen < 0)
2809 return args;
2810 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002812 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002813 PyErr_SetString(PyExc_TypeError,
2814 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002815 return NULL;
2816}
2817
Tim Peters38fd5b62000-09-21 05:43:11 +00002818/* Format codes
2819 * F_LJUST '-'
2820 * F_SIGN '+'
2821 * F_BLANK ' '
2822 * F_ALT '#'
2823 * F_ZERO '0'
2824 */
Guido van Rossume5372401993-03-16 12:15:04 +00002825#define F_LJUST (1<<0)
2826#define F_SIGN (1<<1)
2827#define F_BLANK (1<<2)
2828#define F_ALT (1<<3)
2829#define F_ZERO (1<<4)
2830
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002831static int
Fred Drakeba096332000-07-09 07:04:36 +00002832formatfloat(char *buf, size_t buflen, int flags,
2833 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002834{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002835 /* fmt = '%#.' + `prec` + `type`
2836 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002837 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002838 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002839 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002840 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002841 if (prec < 0)
2842 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002843 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2844 type = 'g';
2845 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002846 /* worst case length calc to ensure no buffer overrun:
2847 fmt = %#.<prec>g
2848 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002849 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002850 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2851 If prec=0 the effective precision is 1 (the leading digit is
2852 always given), therefore increase by one to 10+prec. */
2853 if (buflen <= (size_t)10 + (size_t)prec) {
2854 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002855 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002856 return -1;
2857 }
Guido van Rossume5372401993-03-16 12:15:04 +00002858 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002859 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002860}
2861
Tim Peters38fd5b62000-09-21 05:43:11 +00002862/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2863 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2864 * Python's regular ints.
2865 * Return value: a new PyString*, or NULL if error.
2866 * . *pbuf is set to point into it,
2867 * *plen set to the # of chars following that.
2868 * Caller must decref it when done using pbuf.
2869 * The string starting at *pbuf is of the form
2870 * "-"? ("0x" | "0X")? digit+
2871 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002872 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002873 * There will be at least prec digits, zero-filled on the left if
2874 * necessary to get that many.
2875 * val object to be converted
2876 * flags bitmask of format flags; only F_ALT is looked at
2877 * prec minimum number of digits; 0-fill on left if needed
2878 * type a character in [duoxX]; u acts the same as d
2879 *
2880 * CAUTION: o, x and X conversions on regular ints can never
2881 * produce a '-' sign, but can for Python's unbounded ints.
2882 */
2883PyObject*
2884_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2885 char **pbuf, int *plen)
2886{
2887 PyObject *result = NULL;
2888 char *buf;
2889 int i;
2890 int sign; /* 1 if '-', else 0 */
2891 int len; /* number of characters */
2892 int numdigits; /* len == numnondigits + numdigits */
2893 int numnondigits = 0;
2894
2895 switch (type) {
2896 case 'd':
2897 case 'u':
2898 result = val->ob_type->tp_str(val);
2899 break;
2900 case 'o':
2901 result = val->ob_type->tp_as_number->nb_oct(val);
2902 break;
2903 case 'x':
2904 case 'X':
2905 numnondigits = 2;
2906 result = val->ob_type->tp_as_number->nb_hex(val);
2907 break;
2908 default:
2909 assert(!"'type' not in [duoxX]");
2910 }
2911 if (!result)
2912 return NULL;
2913
2914 /* To modify the string in-place, there can only be one reference. */
2915 if (result->ob_refcnt != 1) {
2916 PyErr_BadInternalCall();
2917 return NULL;
2918 }
2919 buf = PyString_AsString(result);
2920 len = PyString_Size(result);
2921 if (buf[len-1] == 'L') {
2922 --len;
2923 buf[len] = '\0';
2924 }
2925 sign = buf[0] == '-';
2926 numnondigits += sign;
2927 numdigits = len - numnondigits;
2928 assert(numdigits > 0);
2929
Tim Petersfff53252001-04-12 18:38:48 +00002930 /* Get rid of base marker unless F_ALT */
2931 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002932 /* Need to skip 0x, 0X or 0. */
2933 int skipped = 0;
2934 switch (type) {
2935 case 'o':
2936 assert(buf[sign] == '0');
2937 /* If 0 is only digit, leave it alone. */
2938 if (numdigits > 1) {
2939 skipped = 1;
2940 --numdigits;
2941 }
2942 break;
2943 case 'x':
2944 case 'X':
2945 assert(buf[sign] == '0');
2946 assert(buf[sign + 1] == 'x');
2947 skipped = 2;
2948 numnondigits -= 2;
2949 break;
2950 }
2951 if (skipped) {
2952 buf += skipped;
2953 len -= skipped;
2954 if (sign)
2955 buf[0] = '-';
2956 }
2957 assert(len == numnondigits + numdigits);
2958 assert(numdigits > 0);
2959 }
2960
2961 /* Fill with leading zeroes to meet minimum width. */
2962 if (prec > numdigits) {
2963 PyObject *r1 = PyString_FromStringAndSize(NULL,
2964 numnondigits + prec);
2965 char *b1;
2966 if (!r1) {
2967 Py_DECREF(result);
2968 return NULL;
2969 }
2970 b1 = PyString_AS_STRING(r1);
2971 for (i = 0; i < numnondigits; ++i)
2972 *b1++ = *buf++;
2973 for (i = 0; i < prec - numdigits; i++)
2974 *b1++ = '0';
2975 for (i = 0; i < numdigits; i++)
2976 *b1++ = *buf++;
2977 *b1 = '\0';
2978 Py_DECREF(result);
2979 result = r1;
2980 buf = PyString_AS_STRING(result);
2981 len = numnondigits + prec;
2982 }
2983
2984 /* Fix up case for hex conversions. */
2985 switch (type) {
2986 case 'x':
2987 /* Need to convert all upper case letters to lower case. */
2988 for (i = 0; i < len; i++)
2989 if (buf[i] >= 'A' && buf[i] <= 'F')
2990 buf[i] += 'a'-'A';
2991 break;
2992 case 'X':
2993 /* Need to convert 0x to 0X (and -0x to -0X). */
2994 if (buf[sign + 1] == 'x')
2995 buf[sign + 1] = 'X';
2996 break;
2997 }
2998 *pbuf = buf;
2999 *plen = len;
3000 return result;
3001}
3002
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003003static int
Fred Drakeba096332000-07-09 07:04:36 +00003004formatint(char *buf, size_t buflen, int flags,
3005 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003006{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003007 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003008 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3009 + 1 + 1 = 24 */
3010 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003011 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003012 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003013 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003014 if (prec < 0)
3015 prec = 1;
3016 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003017 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003018 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003019 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003020 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003021 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003022 return -1;
3023 }
Guido van Rossume5372401993-03-16 12:15:04 +00003024 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003025 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3026 * but we want it (for consistency with other %#x conversions, and
3027 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003028 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3029 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3030 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003031 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003032 if (x == 0 &&
3033 (flags & F_ALT) &&
3034 (type == 'x' || type == 'X') &&
3035 buf[1] != (char)type) /* this last always true under std C */
3036 {
Tim Petersfff53252001-04-12 18:38:48 +00003037 memmove(buf+2, buf, strlen(buf) + 1);
3038 buf[0] = '0';
3039 buf[1] = (char)type;
3040 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003041 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003042}
3043
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003044static int
Fred Drakeba096332000-07-09 07:04:36 +00003045formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003046{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003047 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003048 if (PyString_Check(v)) {
3049 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003050 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003051 }
3052 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003053 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003054 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003055 }
3056 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003057 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003058}
3059
Guido van Rossum013142a1994-08-30 08:19:36 +00003060
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003061/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3062
3063 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3064 chars are formatted. XXX This is a magic number. Each formatting
3065 routine does bounds checking to ensure no overflow, but a better
3066 solution may be to malloc a buffer of appropriate size for each
3067 format. For now, the current solution is sufficient.
3068*/
3069#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003071PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003072PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003073{
3074 char *fmt, *res;
3075 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003076 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003077 PyObject *result, *orig_args;
3078#ifdef Py_USING_UNICODE
3079 PyObject *v, *w;
3080#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 PyObject *dict = NULL;
3082 if (format == NULL || !PyString_Check(format) || args == NULL) {
3083 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003084 return NULL;
3085 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003086 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003087 fmt = PyString_AsString(format);
3088 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003089 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003090 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003091 if (result == NULL)
3092 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093 res = PyString_AsString(result);
3094 if (PyTuple_Check(args)) {
3095 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003096 argidx = 0;
3097 }
3098 else {
3099 arglen = -1;
3100 argidx = -2;
3101 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003102 if (args->ob_type->tp_as_mapping)
3103 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003104 while (--fmtcnt >= 0) {
3105 if (*fmt != '%') {
3106 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003107 rescnt = fmtcnt + 100;
3108 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003109 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003110 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003111 res = PyString_AsString(result)
3112 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003113 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003114 }
3115 *res++ = *fmt++;
3116 }
3117 else {
3118 /* Got a format specifier */
3119 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003120 int width = -1;
3121 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003122 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003123 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003124 PyObject *v = NULL;
3125 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003126 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003127 int sign;
3128 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003129 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003130#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003131 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003132 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003133#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003134
Guido van Rossumda9c2711996-12-05 21:58:58 +00003135 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003136 if (*fmt == '(') {
3137 char *keystart;
3138 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003139 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003140 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003141
3142 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003144 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003145 goto error;
3146 }
3147 ++fmt;
3148 --fmtcnt;
3149 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003150 /* Skip over balanced parentheses */
3151 while (pcount > 0 && --fmtcnt >= 0) {
3152 if (*fmt == ')')
3153 --pcount;
3154 else if (*fmt == '(')
3155 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003156 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003157 }
3158 keylen = fmt - keystart - 1;
3159 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003160 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003161 "incomplete format key");
3162 goto error;
3163 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003164 key = PyString_FromStringAndSize(keystart,
3165 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003166 if (key == NULL)
3167 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003168 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003169 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003170 args_owned = 0;
3171 }
3172 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003173 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003174 if (args == NULL) {
3175 goto error;
3176 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003177 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003178 arglen = -1;
3179 argidx = -2;
3180 }
Guido van Rossume5372401993-03-16 12:15:04 +00003181 while (--fmtcnt >= 0) {
3182 switch (c = *fmt++) {
3183 case '-': flags |= F_LJUST; continue;
3184 case '+': flags |= F_SIGN; continue;
3185 case ' ': flags |= F_BLANK; continue;
3186 case '#': flags |= F_ALT; continue;
3187 case '0': flags |= F_ZERO; continue;
3188 }
3189 break;
3190 }
3191 if (c == '*') {
3192 v = getnextarg(args, arglen, &argidx);
3193 if (v == NULL)
3194 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003195 if (!PyInt_Check(v)) {
3196 PyErr_SetString(PyExc_TypeError,
3197 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003198 goto error;
3199 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003201 if (width < 0) {
3202 flags |= F_LJUST;
3203 width = -width;
3204 }
Guido van Rossume5372401993-03-16 12:15:04 +00003205 if (--fmtcnt >= 0)
3206 c = *fmt++;
3207 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003208 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003209 width = c - '0';
3210 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003211 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003212 if (!isdigit(c))
3213 break;
3214 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 PyErr_SetString(
3216 PyExc_ValueError,
3217 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003218 goto error;
3219 }
3220 width = width*10 + (c - '0');
3221 }
3222 }
3223 if (c == '.') {
3224 prec = 0;
3225 if (--fmtcnt >= 0)
3226 c = *fmt++;
3227 if (c == '*') {
3228 v = getnextarg(args, arglen, &argidx);
3229 if (v == NULL)
3230 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003231 if (!PyInt_Check(v)) {
3232 PyErr_SetString(
3233 PyExc_TypeError,
3234 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003235 goto error;
3236 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003237 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003238 if (prec < 0)
3239 prec = 0;
3240 if (--fmtcnt >= 0)
3241 c = *fmt++;
3242 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003243 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003244 prec = c - '0';
3245 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003246 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003247 if (!isdigit(c))
3248 break;
3249 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250 PyErr_SetString(
3251 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003252 "prec too big");
3253 goto error;
3254 }
3255 prec = prec*10 + (c - '0');
3256 }
3257 }
3258 } /* prec */
3259 if (fmtcnt >= 0) {
3260 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003261 if (--fmtcnt >= 0)
3262 c = *fmt++;
3263 }
3264 }
3265 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 PyErr_SetString(PyExc_ValueError,
3267 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003268 goto error;
3269 }
3270 if (c != '%') {
3271 v = getnextarg(args, arglen, &argidx);
3272 if (v == NULL)
3273 goto error;
3274 }
3275 sign = 0;
3276 fill = ' ';
3277 switch (c) {
3278 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003279 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003280 len = 1;
3281 break;
3282 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003283 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003284#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003285 if (PyUnicode_Check(v)) {
3286 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003287 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003288 goto unicode;
3289 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003290#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003291 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003292 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003293 else
3294 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003295 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003296 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003297 if (!PyString_Check(temp)) {
3298 PyErr_SetString(PyExc_TypeError,
3299 "%s argument has non-string str()");
3300 goto error;
3301 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003302 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003304 if (prec >= 0 && len > prec)
3305 len = prec;
3306 break;
3307 case 'i':
3308 case 'd':
3309 case 'u':
3310 case 'o':
3311 case 'x':
3312 case 'X':
3313 if (c == 'i')
3314 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003315 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003316 temp = _PyString_FormatLong(v, flags,
3317 prec, c, &pbuf, &len);
3318 if (!temp)
3319 goto error;
3320 /* unbounded ints can always produce
3321 a sign character! */
3322 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003323 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003324 else {
3325 pbuf = formatbuf;
3326 len = formatint(pbuf, sizeof(formatbuf),
3327 flags, prec, c, v);
3328 if (len < 0)
3329 goto error;
3330 /* only d conversion is signed */
3331 sign = c == 'd';
3332 }
3333 if (flags & F_ZERO)
3334 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003335 break;
3336 case 'e':
3337 case 'E':
3338 case 'f':
3339 case 'g':
3340 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003341 pbuf = formatbuf;
3342 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003343 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003344 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003345 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003346 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003347 fill = '0';
3348 break;
3349 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003350 pbuf = formatbuf;
3351 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003352 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003353 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003354 break;
3355 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003356 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003357 "unsupported format character '%c' (0x%x) "
3358 "at index %i",
3359 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003360 goto error;
3361 }
3362 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003363 if (*pbuf == '-' || *pbuf == '+') {
3364 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003365 len--;
3366 }
3367 else if (flags & F_SIGN)
3368 sign = '+';
3369 else if (flags & F_BLANK)
3370 sign = ' ';
3371 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003372 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003373 }
3374 if (width < len)
3375 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003376 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003377 reslen -= rescnt;
3378 rescnt = width + fmtcnt + 100;
3379 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003380 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003381 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003382 res = PyString_AsString(result)
3383 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003384 }
3385 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003386 if (fill != ' ')
3387 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003388 rescnt--;
3389 if (width > len)
3390 width--;
3391 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003392 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3393 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003394 assert(pbuf[1] == c);
3395 if (fill != ' ') {
3396 *res++ = *pbuf++;
3397 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003398 }
Tim Petersfff53252001-04-12 18:38:48 +00003399 rescnt -= 2;
3400 width -= 2;
3401 if (width < 0)
3402 width = 0;
3403 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003404 }
3405 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003406 do {
3407 --rescnt;
3408 *res++ = fill;
3409 } while (--width > len);
3410 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003411 if (fill == ' ') {
3412 if (sign)
3413 *res++ = sign;
3414 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003415 (c == 'x' || c == 'X')) {
3416 assert(pbuf[0] == '0');
3417 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003418 *res++ = *pbuf++;
3419 *res++ = *pbuf++;
3420 }
3421 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003422 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003423 res += len;
3424 rescnt -= len;
3425 while (--width >= len) {
3426 --rescnt;
3427 *res++ = ' ';
3428 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003429 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003430 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003431 "not all arguments converted");
3432 goto error;
3433 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003434 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003435 } /* '%' */
3436 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003437 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003438 PyErr_SetString(PyExc_TypeError,
3439 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003440 goto error;
3441 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003442 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003444 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003446 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003447
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003448#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003449 unicode:
3450 if (args_owned) {
3451 Py_DECREF(args);
3452 args_owned = 0;
3453 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003454 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003455 if (PyTuple_Check(orig_args) && argidx > 0) {
3456 PyObject *v;
3457 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3458 v = PyTuple_New(n);
3459 if (v == NULL)
3460 goto error;
3461 while (--n >= 0) {
3462 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3463 Py_INCREF(w);
3464 PyTuple_SET_ITEM(v, n, w);
3465 }
3466 args = v;
3467 } else {
3468 Py_INCREF(orig_args);
3469 args = orig_args;
3470 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003471 args_owned = 1;
3472 /* Take what we have of the result and let the Unicode formatting
3473 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003474 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003475 if (_PyString_Resize(&result, rescnt))
3476 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003477 fmtcnt = PyString_GET_SIZE(format) - \
3478 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003479 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3480 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003481 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003482 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003483 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003484 if (v == NULL)
3485 goto error;
3486 /* Paste what we have (result) to what the Unicode formatting
3487 function returned (v) and return the result (or error) */
3488 w = PyUnicode_Concat(result, v);
3489 Py_DECREF(result);
3490 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003491 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003492 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003493#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003494
Guido van Rossume5372401993-03-16 12:15:04 +00003495 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003497 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003498 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003499 }
Guido van Rossume5372401993-03-16 12:15:04 +00003500 return NULL;
3501}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003502
3503
3504#ifdef INTERN_STRINGS
3505
Barry Warsaw4df762f2000-08-16 23:41:01 +00003506/* This dictionary will leak at PyString_Fini() time. That's acceptable
3507 * because PyString_Fini() specifically frees interned strings that are
3508 * only referenced by this dictionary. The CVS log entry for revision 2.45
3509 * says:
3510 *
3511 * Change the Fini function to only remove otherwise unreferenced
3512 * strings from the interned table. There are references in
3513 * hard-to-find static variables all over the interpreter, and it's not
3514 * worth trying to get rid of all those; but "uninterning" isn't fair
3515 * either and may cause subtle failures later -- so we have to keep them
3516 * in the interned table.
3517 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003518static PyObject *interned;
3519
3520void
Fred Drakeba096332000-07-09 07:04:36 +00003521PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003522{
3523 register PyStringObject *s = (PyStringObject *)(*p);
3524 PyObject *t;
3525 if (s == NULL || !PyString_Check(s))
3526 Py_FatalError("PyString_InternInPlace: strings only please!");
3527 if ((t = s->ob_sinterned) != NULL) {
3528 if (t == (PyObject *)s)
3529 return;
3530 Py_INCREF(t);
3531 *p = t;
3532 Py_DECREF(s);
3533 return;
3534 }
3535 if (interned == NULL) {
3536 interned = PyDict_New();
3537 if (interned == NULL)
3538 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003539 }
3540 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3541 Py_INCREF(t);
3542 *p = s->ob_sinterned = t;
3543 Py_DECREF(s);
3544 return;
3545 }
3546 t = (PyObject *)s;
3547 if (PyDict_SetItem(interned, t, t) == 0) {
3548 s->ob_sinterned = t;
3549 return;
3550 }
3551 PyErr_Clear();
3552}
3553
3554
3555PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003556PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003557{
3558 PyObject *s = PyString_FromString(cp);
3559 if (s == NULL)
3560 return NULL;
3561 PyString_InternInPlace(&s);
3562 return s;
3563}
3564
3565#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003566
3567void
Fred Drakeba096332000-07-09 07:04:36 +00003568PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003569{
3570 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003571 for (i = 0; i < UCHAR_MAX + 1; i++) {
3572 Py_XDECREF(characters[i]);
3573 characters[i] = NULL;
3574 }
3575#ifndef DONT_SHARE_SHORT_STRINGS
3576 Py_XDECREF(nullstring);
3577 nullstring = NULL;
3578#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003579#ifdef INTERN_STRINGS
3580 if (interned) {
3581 int pos, changed;
3582 PyObject *key, *value;
3583 do {
3584 changed = 0;
3585 pos = 0;
3586 while (PyDict_Next(interned, &pos, &key, &value)) {
3587 if (key->ob_refcnt == 2 && key == value) {
3588 PyDict_DelItem(interned, key);
3589 changed = 1;
3590 }
3591 }
3592 } while (changed);
3593 }
3594#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003595}
Barry Warsawa903ad982001-02-23 16:40:48 +00003596
3597#ifdef INTERN_STRINGS
3598void _Py_ReleaseInternedStrings(void)
3599{
3600 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003601 fprintf(stderr, "releasing interned strings\n");
3602 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003603 Py_DECREF(interned);
3604 interned = NULL;
3605 }
3606}
3607#endif /* INTERN_STRINGS */