blob: 13328812bbd2a56a1636c40c44c4d4937ddf7117 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000302 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000303 va_list vargs;
304
305#ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307#else
308 va_start(vargs);
309#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313}
314
315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000316PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
320{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000321 PyObject *v, *str;
322
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
329}
330
331PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
334{
335 PyObject *v;
336
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
340 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000341
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342 if (encoding == NULL) {
343#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000344 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000345#else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348#endif
349 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350
351 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355
356 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 return NULL;
360}
361
362PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v;
367
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
388 }
389
390 return v;
391
392 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 return NULL;
394}
395
396PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
409}
410
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
420 }
421
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422 if (encoding == NULL) {
423#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428#endif
429 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000435
436 return v;
437
438 onError:
439 return NULL;
440}
441
442PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v;
447
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000448 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449 if (v == NULL)
450 goto onError;
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
468 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000471
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 onError:
473 return NULL;
474}
475
Guido van Rossum234f9421993-06-17 12:35:49 +0000476static void
Fred Drakeba096332000-07-09 07:04:36 +0000477string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000478{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000480}
481
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000482static int
483string_getsize(register PyObject *op)
484{
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
490}
491
492static /*const*/ char *
493string_getbuffer(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
500}
501
Guido van Rossumd7047b31995-01-02 19:07:15 +0000502int
Fred Drakeba096332000-07-09 07:04:36 +0000503PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000505 if (!PyString_Check(op))
506 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000511PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000513 if (!PyString_Check(op))
514 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518int
519PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
522{
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
526 }
527
528 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000529#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535 else
536#endif
537 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
542 }
543 }
544
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
552 }
553 return 0;
554}
555
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556/* Methods */
557
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560{
561 int i;
562 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000563 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000567 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569
Thomas Wouters7e474022000-07-16 12:04:32 +0000570 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
574
575 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000589 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000596string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
603 }
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000606 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
608 else {
609 register int i;
610 register char c;
611 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 int quote;
613
Thomas Wouters7e474022000-07-16 12:04:32 +0000614 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 }
635 else
636 *p++ = c;
637 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000642 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644}
645
Guido van Rossum189f1df2001-05-01 16:51:53 +0000646static PyObject *
647string_str(PyObject *s)
648{
649 Py_INCREF(s);
650 return s;
651}
652
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653static int
Fred Drakeba096332000-07-09 07:04:36 +0000654string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655{
656 return a->ob_size;
657}
658
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000660string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661{
662 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000668#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000669 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 return NULL;
673 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 /* Optimize cases with empty left or right operand */
676 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000677 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678 return bb;
679 }
680 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000681 Py_INCREF(a);
682 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 }
684 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000685 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000686 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000687 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000688 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000689 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000690 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000691#ifdef CACHE_HASH
692 op->ob_shash = -1;
693#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000694#ifdef INTERN_STRINGS
695 op->ob_sinterned = NULL;
696#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000697 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
698 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
699 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701#undef b
702}
703
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000704static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000705string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706{
707 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000708 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000710 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711 if (n < 0)
712 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000713 /* watch out for overflows: the size can overflow int,
714 * and the # of bytes needed can overflow size_t
715 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000717 if (n && size / n != a->ob_size) {
718 PyErr_SetString(PyExc_OverflowError,
719 "repeated string is too long");
720 return NULL;
721 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723 Py_INCREF(a);
724 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725 }
Tim Peters8f422462000-09-09 06:13:41 +0000726 nbytes = size * sizeof(char);
727 if (nbytes / sizeof(char) != (size_t)size ||
728 nbytes + sizeof(PyStringObject) <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated string is too long");
731 return NULL;
732 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000734 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000735 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000737 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000738#ifdef CACHE_HASH
739 op->ob_shash = -1;
740#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000741#ifdef INTERN_STRINGS
742 op->ob_sinterned = NULL;
743#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000744 for (i = 0; i < size; i += a->ob_size)
745 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
746 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000747 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748}
749
750/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
751
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000753string_slice(register PyStringObject *a, register int i, register int j)
754 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000755{
756 if (i < 0)
757 i = 0;
758 if (j < 0)
759 j = 0; /* Avoid signed/unsigned bug in next line */
760 if (j > a->ob_size)
761 j = a->ob_size;
762 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000763 Py_INCREF(a);
764 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 }
766 if (j < i)
767 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000768 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769}
770
Guido van Rossum9284a572000-03-07 15:53:43 +0000771static int
Fred Drakeba096332000-07-09 07:04:36 +0000772string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000773{
774 register char *s, *end;
775 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000776#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000777 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000778 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000779#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000780 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000781 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000782 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000783 return -1;
784 }
785 c = PyString_AsString(el)[0];
786 s = PyString_AsString(a);
787 end = s + PyString_Size(a);
788 while (s < end) {
789 if (c == *s++)
790 return 1;
791 }
792 return 0;
793}
794
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000795static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000796string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000798 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000799 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000801 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 return NULL;
803 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000804 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000805 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 if (v == NULL)
807 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000808 else {
809#ifdef COUNT_ALLOCS
810 one_strings++;
811#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000812 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000813 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000814 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815}
816
Martin v. Löwiscd353062001-05-24 16:56:35 +0000817static PyObject*
818string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000819{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000820 int c;
821 int len_a, len_b;
822 int min_len;
823 PyObject *result;
824
825 /* One of the objects is a string object. Make sure the
826 other one is one, too. */
827 if (a->ob_type != b->ob_type) {
828 result = Py_NotImplemented;
829 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000830 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 if (a == b) {
832 switch (op) {
833 case Py_EQ:case Py_LE:case Py_GE:
834 result = Py_True;
835 goto out;
836 case Py_NE:case Py_LT:case Py_GT:
837 result = Py_False;
838 goto out;
839 }
840 }
841 if (op == Py_EQ) {
842 /* Supporting Py_NE here as well does not save
843 much time, since Py_NE is rarely used. */
844 if (a->ob_size == b->ob_size
845 && (a->ob_sval[0] == b->ob_sval[0]
846 && memcmp(a->ob_sval, b->ob_sval,
847 a->ob_size) == 0)) {
848 result = Py_True;
849 } else {
850 result = Py_False;
851 }
852 goto out;
853 }
854 len_a = a->ob_size; len_b = b->ob_size;
855 min_len = (len_a < len_b) ? len_a : len_b;
856 if (min_len > 0) {
857 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
858 if (c==0)
859 c = memcmp(a->ob_sval, b->ob_sval, min_len);
860 }else
861 c = 0;
862 if (c == 0)
863 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
864 switch (op) {
865 case Py_LT: c = c < 0; break;
866 case Py_LE: c = c <= 0; break;
867 case Py_EQ: assert(0); break; /* unreachable */
868 case Py_NE: c = c != 0; break;
869 case Py_GT: c = c > 0; break;
870 case Py_GE: c = c >= 0; break;
871 default:
872 result = Py_NotImplemented;
873 goto out;
874 }
875 result = c ? Py_True : Py_False;
876 out:
877 Py_INCREF(result);
878 return result;
879}
880
881int
882_PyString_Eq(PyObject *o1, PyObject *o2)
883{
884 PyStringObject *a, *b;
885 a = (PyStringObject*)o1;
886 b = (PyStringObject*)o2;
887 return a->ob_size == b->ob_size
888 && *a->ob_sval == *b->ob_sval
889 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890}
891
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892static long
Fred Drakeba096332000-07-09 07:04:36 +0000893string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000894{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000895 register int len;
896 register unsigned char *p;
897 register long x;
898
899#ifdef CACHE_HASH
900 if (a->ob_shash != -1)
901 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000902#ifdef INTERN_STRINGS
903 if (a->ob_sinterned != NULL)
904 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000906#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000907#endif
908 len = a->ob_size;
909 p = (unsigned char *) a->ob_sval;
910 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000911 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000912 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000913 x ^= a->ob_size;
914 if (x == -1)
915 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916#ifdef CACHE_HASH
917 a->ob_shash = x;
918#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919 return x;
920}
921
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000922static int
Fred Drakeba096332000-07-09 07:04:36 +0000923string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000924{
925 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000926 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000927 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000928 return -1;
929 }
930 *ptr = (void *)self->ob_sval;
931 return self->ob_size;
932}
933
934static int
Fred Drakeba096332000-07-09 07:04:36 +0000935string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000936{
Guido van Rossum045e6881997-09-08 18:30:11 +0000937 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000938 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000939 return -1;
940}
941
942static int
Fred Drakeba096332000-07-09 07:04:36 +0000943string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000944{
945 if ( lenp )
946 *lenp = self->ob_size;
947 return 1;
948}
949
Guido van Rossum1db70701998-10-08 02:18:52 +0000950static int
Fred Drakeba096332000-07-09 07:04:36 +0000951string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000952{
953 if ( index != 0 ) {
954 PyErr_SetString(PyExc_SystemError,
955 "accessing non-existent string segment");
956 return -1;
957 }
958 *ptr = self->ob_sval;
959 return self->ob_size;
960}
961
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000963 (inquiry)string_length, /*sq_length*/
964 (binaryfunc)string_concat, /*sq_concat*/
965 (intargfunc)string_repeat, /*sq_repeat*/
966 (intargfunc)string_item, /*sq_item*/
967 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000968 0, /*sq_ass_item*/
969 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000970 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971};
972
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000973static PyBufferProcs string_as_buffer = {
974 (getreadbufferproc)string_buffer_getreadbuf,
975 (getwritebufferproc)string_buffer_getwritebuf,
976 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000977 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000978};
979
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000980
981
982#define LEFTSTRIP 0
983#define RIGHTSTRIP 1
984#define BOTHSTRIP 2
985
986
987static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000988split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000989{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000990 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991 PyObject* item;
992 PyObject *list = PyList_New(0);
993
994 if (list == NULL)
995 return NULL;
996
Guido van Rossum4c08d552000-03-10 22:55:18 +0000997 for (i = j = 0; i < len; ) {
998 while (i < len && isspace(Py_CHARMASK(s[i])))
999 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001000 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001001 while (i < len && !isspace(Py_CHARMASK(s[i])))
1002 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001003 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001004 if (maxsplit-- <= 0)
1005 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1007 if (item == NULL)
1008 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001009 err = PyList_Append(list, item);
1010 Py_DECREF(item);
1011 if (err < 0)
1012 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001013 while (i < len && isspace(Py_CHARMASK(s[i])))
1014 i++;
1015 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 }
1017 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001018 if (j < len) {
1019 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1020 if (item == NULL)
1021 goto finally;
1022 err = PyList_Append(list, item);
1023 Py_DECREF(item);
1024 if (err < 0)
1025 goto finally;
1026 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027 return list;
1028 finally:
1029 Py_DECREF(list);
1030 return NULL;
1031}
1032
1033
1034static char split__doc__[] =
1035"S.split([sep [,maxsplit]]) -> list of strings\n\
1036\n\
1037Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001038delimiter string. If maxsplit is given, at most maxsplit\n\
1039splits are done. If sep is not specified, any whitespace string\n\
1040is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041
1042static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001043string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044{
1045 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001046 int maxsplit = -1;
1047 const char *s = PyString_AS_STRING(self), *sub;
1048 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001052 if (maxsplit < 0)
1053 maxsplit = INT_MAX;
1054 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001056 if (PyString_Check(subobj)) {
1057 sub = PyString_AS_STRING(subobj);
1058 n = PyString_GET_SIZE(subobj);
1059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001061 else if (PyUnicode_Check(subobj))
1062 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001063#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1065 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066 if (n == 0) {
1067 PyErr_SetString(PyExc_ValueError, "empty separator");
1068 return NULL;
1069 }
1070
1071 list = PyList_New(0);
1072 if (list == NULL)
1073 return NULL;
1074
1075 i = j = 0;
1076 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001077 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (maxsplit-- <= 0)
1079 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1081 if (item == NULL)
1082 goto fail;
1083 err = PyList_Append(list, item);
1084 Py_DECREF(item);
1085 if (err < 0)
1086 goto fail;
1087 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 }
1089 else
1090 i++;
1091 }
1092 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1093 if (item == NULL)
1094 goto fail;
1095 err = PyList_Append(list, item);
1096 Py_DECREF(item);
1097 if (err < 0)
1098 goto fail;
1099
1100 return list;
1101
1102 fail:
1103 Py_DECREF(list);
1104 return NULL;
1105}
1106
1107
1108static char join__doc__[] =
1109"S.join(sequence) -> string\n\
1110\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001111Return a string which is the concatenation of the strings in the\n\
1112sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113
1114static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001115string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116{
1117 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001118 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001120 char *p;
1121 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001122 size_t sz = 0;
1123 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001124 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001125
Tim Peters19fe14e2001-01-19 03:03:47 +00001126 seq = PySequence_Fast(orig, "");
1127 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001128 if (PyErr_ExceptionMatches(PyExc_TypeError))
1129 PyErr_Format(PyExc_TypeError,
1130 "sequence expected, %.80s found",
1131 orig->ob_type->tp_name);
1132 return NULL;
1133 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001134
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001135 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001136 if (seqlen == 0) {
1137 Py_DECREF(seq);
1138 return PyString_FromString("");
1139 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001141 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001142 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1143 PyErr_Format(PyExc_TypeError,
1144 "sequence item 0: expected string,"
1145 " %.80s found",
1146 item->ob_type->tp_name);
1147 Py_DECREF(seq);
1148 return NULL;
1149 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001150 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001151 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001152 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001153 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001154
Tim Peters19fe14e2001-01-19 03:03:47 +00001155 /* There are at least two things to join. Do a pre-pass to figure out
1156 * the total amount of space we'll need (sz), see whether any argument
1157 * is absurd, and defer to the Unicode join if appropriate.
1158 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001159 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001160 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001161 item = PySequence_Fast_GET_ITEM(seq, i);
1162 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001163#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001164 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001165 /* Defer to Unicode join.
1166 * CAUTION: There's no gurantee that the
1167 * original sequence can be iterated over
1168 * again, so we must pass seq here.
1169 */
1170 PyObject *result;
1171 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001172 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001173 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001174 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001175#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001177 "sequence item %i: expected string,"
1178 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001179 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001180 Py_DECREF(seq);
1181 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001182 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001183 sz += PyString_GET_SIZE(item);
1184 if (i != 0)
1185 sz += seplen;
1186 if (sz < old_sz || sz > INT_MAX) {
1187 PyErr_SetString(PyExc_OverflowError,
1188 "join() is too long for a Python string");
1189 Py_DECREF(seq);
1190 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001191 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001192 }
1193
1194 /* Allocate result space. */
1195 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1196 if (res == NULL) {
1197 Py_DECREF(seq);
1198 return NULL;
1199 }
1200
1201 /* Catenate everything. */
1202 p = PyString_AS_STRING(res);
1203 for (i = 0; i < seqlen; ++i) {
1204 size_t n;
1205 item = PySequence_Fast_GET_ITEM(seq, i);
1206 n = PyString_GET_SIZE(item);
1207 memcpy(p, PyString_AS_STRING(item), n);
1208 p += n;
1209 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001210 memcpy(p, sep, seplen);
1211 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001212 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001214
Jeremy Hylton49048292000-07-11 03:28:17 +00001215 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217}
1218
Tim Peters52e155e2001-06-16 05:42:57 +00001219PyObject *
1220_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001221{
Tim Petersa7259592001-06-16 05:11:17 +00001222 assert(sep != NULL && PyString_Check(sep));
1223 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001224 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001225}
1226
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001227static long
Fred Drakeba096332000-07-09 07:04:36 +00001228string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001230 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231 int len = PyString_GET_SIZE(self);
1232 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001235 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001236 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001237 return -2;
1238 if (PyString_Check(subobj)) {
1239 sub = PyString_AS_STRING(subobj);
1240 n = PyString_GET_SIZE(subobj);
1241 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001242#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243 else if (PyUnicode_Check(subobj))
1244 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001245#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001246 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001247 return -2;
1248
1249 if (last > len)
1250 last = len;
1251 if (last < 0)
1252 last += len;
1253 if (last < 0)
1254 last = 0;
1255 if (i < 0)
1256 i += len;
1257 if (i < 0)
1258 i = 0;
1259
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 if (dir > 0) {
1261 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 last -= n;
1264 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001265 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 return (long)i;
1267 }
1268 else {
1269 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001270
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271 if (n == 0 && i <= last)
1272 return (long)last;
1273 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001274 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001275 return (long)j;
1276 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 return -1;
1279}
1280
1281
1282static char find__doc__[] =
1283"S.find(sub [,start [,end]]) -> int\n\
1284\n\
1285Return the lowest index in S where substring sub is found,\n\
1286such that sub is contained within s[start,end]. Optional\n\
1287arguments start and end are interpreted as in slice notation.\n\
1288\n\
1289Return -1 on failure.";
1290
1291static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001292string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295 if (result == -2)
1296 return NULL;
1297 return PyInt_FromLong(result);
1298}
1299
1300
1301static char index__doc__[] =
1302"S.index(sub [,start [,end]]) -> int\n\
1303\n\
1304Like S.find() but raise ValueError when the substring is not found.";
1305
1306static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001307string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001309 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 if (result == -2)
1311 return NULL;
1312 if (result == -1) {
1313 PyErr_SetString(PyExc_ValueError,
1314 "substring not found in string.index");
1315 return NULL;
1316 }
1317 return PyInt_FromLong(result);
1318}
1319
1320
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321static char rfind__doc__[] =
1322"S.rfind(sub [,start [,end]]) -> int\n\
1323\n\
1324Return the highest index in S where substring sub is found,\n\
1325such that sub is contained within s[start,end]. Optional\n\
1326arguments start and end are interpreted as in slice notation.\n\
1327\n\
1328Return -1 on failure.";
1329
1330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001331string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 if (result == -2)
1335 return NULL;
1336 return PyInt_FromLong(result);
1337}
1338
1339
1340static char rindex__doc__[] =
1341"S.rindex(sub [,start [,end]]) -> int\n\
1342\n\
1343Like S.rfind() but raise ValueError when the substring is not found.";
1344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 if (result == -2)
1350 return NULL;
1351 if (result == -1) {
1352 PyErr_SetString(PyExc_ValueError,
1353 "substring not found in string.rindex");
1354 return NULL;
1355 }
1356 return PyInt_FromLong(result);
1357}
1358
1359
1360static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001361do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362{
1363 char *s = PyString_AS_STRING(self);
1364 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 i = 0;
1367 if (striptype != RIGHTSTRIP) {
1368 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1369 i++;
1370 }
1371 }
1372
1373 j = len;
1374 if (striptype != LEFTSTRIP) {
1375 do {
1376 j--;
1377 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1378 j++;
1379 }
1380
1381 if (i == 0 && j == len) {
1382 Py_INCREF(self);
1383 return (PyObject*)self;
1384 }
1385 else
1386 return PyString_FromStringAndSize(s+i, j-i);
1387}
1388
1389
1390static char strip__doc__[] =
1391"S.strip() -> string\n\
1392\n\
1393Return a copy of the string S with leading and trailing\n\
1394whitespace removed.";
1395
1396static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001397string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001399 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400}
1401
1402
1403static char lstrip__doc__[] =
1404"S.lstrip() -> string\n\
1405\n\
1406Return a copy of the string S with leading whitespace removed.";
1407
1408static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001409string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001411 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412}
1413
1414
1415static char rstrip__doc__[] =
1416"S.rstrip() -> string\n\
1417\n\
1418Return a copy of the string S with trailing whitespace removed.";
1419
1420static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001421string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001423 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424}
1425
1426
1427static char lower__doc__[] =
1428"S.lower() -> string\n\
1429\n\
1430Return a copy of the string S converted to lowercase.";
1431
1432static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001433string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434{
1435 char *s = PyString_AS_STRING(self), *s_new;
1436 int i, n = PyString_GET_SIZE(self);
1437 PyObject *new;
1438
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 new = PyString_FromStringAndSize(NULL, n);
1440 if (new == NULL)
1441 return NULL;
1442 s_new = PyString_AsString(new);
1443 for (i = 0; i < n; i++) {
1444 int c = Py_CHARMASK(*s++);
1445 if (isupper(c)) {
1446 *s_new = tolower(c);
1447 } else
1448 *s_new = c;
1449 s_new++;
1450 }
1451 return new;
1452}
1453
1454
1455static char upper__doc__[] =
1456"S.upper() -> string\n\
1457\n\
1458Return a copy of the string S converted to uppercase.";
1459
1460static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001461string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462{
1463 char *s = PyString_AS_STRING(self), *s_new;
1464 int i, n = PyString_GET_SIZE(self);
1465 PyObject *new;
1466
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467 new = PyString_FromStringAndSize(NULL, n);
1468 if (new == NULL)
1469 return NULL;
1470 s_new = PyString_AsString(new);
1471 for (i = 0; i < n; i++) {
1472 int c = Py_CHARMASK(*s++);
1473 if (islower(c)) {
1474 *s_new = toupper(c);
1475 } else
1476 *s_new = c;
1477 s_new++;
1478 }
1479 return new;
1480}
1481
1482
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483static char title__doc__[] =
1484"S.title() -> string\n\
1485\n\
1486Return a titlecased version of S, i.e. words start with uppercase\n\
1487characters, all remaining cased characters have lowercase.";
1488
1489static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001490string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491{
1492 char *s = PyString_AS_STRING(self), *s_new;
1493 int i, n = PyString_GET_SIZE(self);
1494 int previous_is_cased = 0;
1495 PyObject *new;
1496
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497 new = PyString_FromStringAndSize(NULL, n);
1498 if (new == NULL)
1499 return NULL;
1500 s_new = PyString_AsString(new);
1501 for (i = 0; i < n; i++) {
1502 int c = Py_CHARMASK(*s++);
1503 if (islower(c)) {
1504 if (!previous_is_cased)
1505 c = toupper(c);
1506 previous_is_cased = 1;
1507 } else if (isupper(c)) {
1508 if (previous_is_cased)
1509 c = tolower(c);
1510 previous_is_cased = 1;
1511 } else
1512 previous_is_cased = 0;
1513 *s_new++ = c;
1514 }
1515 return new;
1516}
1517
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518static char capitalize__doc__[] =
1519"S.capitalize() -> string\n\
1520\n\
1521Return a copy of the string S with only its first character\n\
1522capitalized.";
1523
1524static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001525string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526{
1527 char *s = PyString_AS_STRING(self), *s_new;
1528 int i, n = PyString_GET_SIZE(self);
1529 PyObject *new;
1530
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 new = PyString_FromStringAndSize(NULL, n);
1532 if (new == NULL)
1533 return NULL;
1534 s_new = PyString_AsString(new);
1535 if (0 < n) {
1536 int c = Py_CHARMASK(*s++);
1537 if (islower(c))
1538 *s_new = toupper(c);
1539 else
1540 *s_new = c;
1541 s_new++;
1542 }
1543 for (i = 1; i < n; i++) {
1544 int c = Py_CHARMASK(*s++);
1545 if (isupper(c))
1546 *s_new = tolower(c);
1547 else
1548 *s_new = c;
1549 s_new++;
1550 }
1551 return new;
1552}
1553
1554
1555static char count__doc__[] =
1556"S.count(sub[, start[, end]]) -> int\n\
1557\n\
1558Return the number of occurrences of substring sub in string\n\
1559S[start:end]. Optional arguments start and end are\n\
1560interpreted as in slice notation.";
1561
1562static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001563string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 int len = PyString_GET_SIZE(self), n;
1567 int i = 0, last = INT_MAX;
1568 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570
Guido van Rossumc6821402000-05-08 14:08:05 +00001571 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1572 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001574
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 if (PyString_Check(subobj)) {
1576 sub = PyString_AS_STRING(subobj);
1577 n = PyString_GET_SIZE(subobj);
1578 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001579#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001580 else if (PyUnicode_Check(subobj)) {
1581 int count;
1582 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1583 if (count == -1)
1584 return NULL;
1585 else
1586 return PyInt_FromLong((long) count);
1587 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001588#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001589 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1590 return NULL;
1591
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592 if (last > len)
1593 last = len;
1594 if (last < 0)
1595 last += len;
1596 if (last < 0)
1597 last = 0;
1598 if (i < 0)
1599 i += len;
1600 if (i < 0)
1601 i = 0;
1602 m = last + 1 - n;
1603 if (n == 0)
1604 return PyInt_FromLong((long) (m-i));
1605
1606 r = 0;
1607 while (i < m) {
1608 if (!memcmp(s+i, sub, n)) {
1609 r++;
1610 i += n;
1611 } else {
1612 i++;
1613 }
1614 }
1615 return PyInt_FromLong((long) r);
1616}
1617
1618
1619static char swapcase__doc__[] =
1620"S.swapcase() -> string\n\
1621\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623converted to lowercase and vice versa.";
1624
1625static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001626string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627{
1628 char *s = PyString_AS_STRING(self), *s_new;
1629 int i, n = PyString_GET_SIZE(self);
1630 PyObject *new;
1631
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 new = PyString_FromStringAndSize(NULL, n);
1633 if (new == NULL)
1634 return NULL;
1635 s_new = PyString_AsString(new);
1636 for (i = 0; i < n; i++) {
1637 int c = Py_CHARMASK(*s++);
1638 if (islower(c)) {
1639 *s_new = toupper(c);
1640 }
1641 else if (isupper(c)) {
1642 *s_new = tolower(c);
1643 }
1644 else
1645 *s_new = c;
1646 s_new++;
1647 }
1648 return new;
1649}
1650
1651
1652static char translate__doc__[] =
1653"S.translate(table [,deletechars]) -> string\n\
1654\n\
1655Return a copy of the string S, where all characters occurring\n\
1656in the optional argument deletechars are removed, and the\n\
1657remaining characters have been mapped through the given\n\
1658translation table, which must be a string of length 256.";
1659
1660static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001661string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001663 register char *input, *output;
1664 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 register int i, c, changed = 0;
1666 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 int inlen, tablen, dellen = 0;
1669 PyObject *result;
1670 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 if (!PyArg_ParseTuple(args, "O|O:translate",
1674 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676
1677 if (PyString_Check(tableobj)) {
1678 table1 = PyString_AS_STRING(tableobj);
1679 tablen = PyString_GET_SIZE(tableobj);
1680 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001681#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001683 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 parameter; instead a mapping to None will cause characters
1685 to be deleted. */
1686 if (delobj != NULL) {
1687 PyErr_SetString(PyExc_TypeError,
1688 "deletions are implemented differently for unicode");
1689 return NULL;
1690 }
1691 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1692 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001693#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696
1697 if (delobj != NULL) {
1698 if (PyString_Check(delobj)) {
1699 del_table = PyString_AS_STRING(delobj);
1700 dellen = PyString_GET_SIZE(delobj);
1701 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001702#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 else if (PyUnicode_Check(delobj)) {
1704 PyErr_SetString(PyExc_TypeError,
1705 "deletions are implemented differently for unicode");
1706 return NULL;
1707 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001708#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1710 return NULL;
1711
1712 if (tablen != 256) {
1713 PyErr_SetString(PyExc_ValueError,
1714 "translation table must be 256 characters long");
1715 return NULL;
1716 }
1717 }
1718 else {
1719 del_table = NULL;
1720 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721 }
1722
1723 table = table1;
1724 inlen = PyString_Size(input_obj);
1725 result = PyString_FromStringAndSize((char *)NULL, inlen);
1726 if (result == NULL)
1727 return NULL;
1728 output_start = output = PyString_AsString(result);
1729 input = PyString_AsString(input_obj);
1730
1731 if (dellen == 0) {
1732 /* If no deletions are required, use faster code */
1733 for (i = inlen; --i >= 0; ) {
1734 c = Py_CHARMASK(*input++);
1735 if (Py_CHARMASK((*output++ = table[c])) != c)
1736 changed = 1;
1737 }
1738 if (changed)
1739 return result;
1740 Py_DECREF(result);
1741 Py_INCREF(input_obj);
1742 return input_obj;
1743 }
1744
1745 for (i = 0; i < 256; i++)
1746 trans_table[i] = Py_CHARMASK(table[i]);
1747
1748 for (i = 0; i < dellen; i++)
1749 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1750
1751 for (i = inlen; --i >= 0; ) {
1752 c = Py_CHARMASK(*input++);
1753 if (trans_table[c] != -1)
1754 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1755 continue;
1756 changed = 1;
1757 }
1758 if (!changed) {
1759 Py_DECREF(result);
1760 Py_INCREF(input_obj);
1761 return input_obj;
1762 }
1763 /* Fix the size of the resulting string */
1764 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1765 return NULL;
1766 return result;
1767}
1768
1769
1770/* What follows is used for implementing replace(). Perry Stoll. */
1771
1772/*
1773 mymemfind
1774
1775 strstr replacement for arbitrary blocks of memory.
1776
Barry Warsaw51ac5802000-03-20 16:36:48 +00001777 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 contents of memory pointed to by PAT. Returns the index into MEM if
1779 found, or -1 if not found. If len of PAT is greater than length of
1780 MEM, the function returns -1.
1781*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001782static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001783mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784{
1785 register int ii;
1786
1787 /* pattern can not occur in the last pat_len-1 chars */
1788 len -= pat_len;
1789
1790 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001791 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 return ii;
1793 }
1794 }
1795 return -1;
1796}
1797
1798/*
1799 mymemcnt
1800
1801 Return the number of distinct times PAT is found in MEM.
1802 meaning mem=1111 and pat==11 returns 2.
1803 mem=11111 and pat==11 also return 2.
1804 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001805static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001806mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807{
1808 register int offset = 0;
1809 int nfound = 0;
1810
1811 while (len >= 0) {
1812 offset = mymemfind(mem, len, pat, pat_len);
1813 if (offset == -1)
1814 break;
1815 mem += offset + pat_len;
1816 len -= offset + pat_len;
1817 nfound++;
1818 }
1819 return nfound;
1820}
1821
1822/*
1823 mymemreplace
1824
Thomas Wouters7e474022000-07-16 12:04:32 +00001825 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 replaced with SUB.
1827
Thomas Wouters7e474022000-07-16 12:04:32 +00001828 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 of PAT in STR, then the original string is returned. Otherwise, a new
1830 string is allocated here and returned.
1831
1832 on return, out_len is:
1833 the length of output string, or
1834 -1 if the input string is returned, or
1835 unchanged if an error occurs (no memory).
1836
1837 return value is:
1838 the new string allocated locally, or
1839 NULL if an error occurred.
1840*/
1841static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001842mymemreplace(const char *str, int len, /* input string */
1843 const char *pat, int pat_len, /* pattern string to find */
1844 const char *sub, int sub_len, /* substitution string */
1845 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001846 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847{
1848 char *out_s;
1849 char *new_s;
1850 int nfound, offset, new_len;
1851
1852 if (len == 0 || pat_len > len)
1853 goto return_same;
1854
1855 /* find length of output string */
1856 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001857 if (count < 0)
1858 count = INT_MAX;
1859 else if (nfound > count)
1860 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 if (nfound == 0)
1862 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001863
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001865 if (new_len == 0) {
1866 /* Have to allocate something for the caller to free(). */
1867 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001868 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001869 return NULL;
1870 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001872 else {
1873 assert(new_len > 0);
1874 new_s = (char *)PyMem_MALLOC(new_len);
1875 if (new_s == NULL)
1876 return NULL;
1877 out_s = new_s;
1878
Tim Peters9c012af2001-05-10 00:32:57 +00001879 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001880 /* find index of next instance of pattern */
1881 offset = mymemfind(str, len, pat, pat_len);
1882 if (offset == -1)
1883 break;
1884
1885 /* copy non matching part of input string */
1886 memcpy(new_s, str, offset);
1887 str += offset + pat_len;
1888 len -= offset + pat_len;
1889
1890 /* copy substitute into the output string */
1891 new_s += offset;
1892 memcpy(new_s, sub, sub_len);
1893 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001894 }
1895 /* copy any remaining values into output string */
1896 if (len > 0)
1897 memcpy(new_s, str, len);
1898 }
1899 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return out_s;
1901
1902 return_same:
1903 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001904 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905}
1906
1907
1908static char replace__doc__[] =
1909"S.replace (old, new[, maxsplit]) -> string\n\
1910\n\
1911Return a copy of string S with all occurrences of substring\n\
1912old replaced by new. If the optional argument maxsplit is\n\
1913given, only the first maxsplit occurrences are replaced.";
1914
1915static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001916string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918 const char *str = PyString_AS_STRING(self), *sub, *repl;
1919 char *new_s;
1920 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1921 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 if (!PyArg_ParseTuple(args, "OO|i:replace",
1926 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928
1929 if (PyString_Check(subobj)) {
1930 sub = PyString_AS_STRING(subobj);
1931 sub_len = PyString_GET_SIZE(subobj);
1932 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001933#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001934 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001935 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001936 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001937#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1939 return NULL;
1940
1941 if (PyString_Check(replobj)) {
1942 repl = PyString_AS_STRING(replobj);
1943 repl_len = PyString_GET_SIZE(replobj);
1944 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001945#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001947 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001949#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1951 return NULL;
1952
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001953 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001954 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 return NULL;
1956 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 if (new_s == NULL) {
1959 PyErr_NoMemory();
1960 return NULL;
1961 }
1962 if (out_len == -1) {
1963 /* we're returning another reference to self */
1964 new = (PyObject*)self;
1965 Py_INCREF(new);
1966 }
1967 else {
1968 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001969 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970 }
1971 return new;
1972}
1973
1974
1975static char startswith__doc__[] =
1976"S.startswith(prefix[, start[, end]]) -> int\n\
1977\n\
1978Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1979optional start, test S beginning at that position. With optional end, stop\n\
1980comparing S at that position.";
1981
1982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001983string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001985 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001987 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988 int plen;
1989 int start = 0;
1990 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992
Guido van Rossumc6821402000-05-08 14:08:05 +00001993 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1994 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995 return NULL;
1996 if (PyString_Check(subobj)) {
1997 prefix = PyString_AS_STRING(subobj);
1998 plen = PyString_GET_SIZE(subobj);
1999 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002000#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002001 else if (PyUnicode_Check(subobj)) {
2002 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002003 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002004 subobj, start, end, -1);
2005 if (rc == -1)
2006 return NULL;
2007 else
2008 return PyInt_FromLong((long) rc);
2009 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002010#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 return NULL;
2013
2014 /* adopt Java semantics for index out of range. it is legal for
2015 * offset to be == plen, but this only returns true if prefix is
2016 * the empty string.
2017 */
2018 if (start < 0 || start+plen > len)
2019 return PyInt_FromLong(0);
2020
2021 if (!memcmp(str+start, prefix, plen)) {
2022 /* did the match end after the specified end? */
2023 if (end < 0)
2024 return PyInt_FromLong(1);
2025 else if (end - start < plen)
2026 return PyInt_FromLong(0);
2027 else
2028 return PyInt_FromLong(1);
2029 }
2030 else return PyInt_FromLong(0);
2031}
2032
2033
2034static char endswith__doc__[] =
2035"S.endswith(suffix[, start[, end]]) -> int\n\
2036\n\
2037Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2038optional start, test S beginning at that position. With optional end, stop\n\
2039comparing S at that position.";
2040
2041static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002042string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002046 const char* suffix;
2047 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 int start = 0;
2049 int end = -1;
2050 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052
Guido van Rossumc6821402000-05-08 14:08:05 +00002053 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2054 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 return NULL;
2056 if (PyString_Check(subobj)) {
2057 suffix = PyString_AS_STRING(subobj);
2058 slen = PyString_GET_SIZE(subobj);
2059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002060#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002061 else if (PyUnicode_Check(subobj)) {
2062 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002063 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002064 subobj, start, end, +1);
2065 if (rc == -1)
2066 return NULL;
2067 else
2068 return PyInt_FromLong((long) rc);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 return NULL;
2073
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075 return PyInt_FromLong(0);
2076
2077 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 return PyInt_FromLong(1);
2082 else return PyInt_FromLong(0);
2083}
2084
2085
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002086static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002087"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002088\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002089Encodes S using the codec registered for encoding. encoding defaults\n\
2090to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002091handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2092a ValueError. Other possible values are 'ignore' and 'replace'.";
2093
2094static PyObject *
2095string_encode(PyStringObject *self, PyObject *args)
2096{
2097 char *encoding = NULL;
2098 char *errors = NULL;
2099 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2100 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002101 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2102}
2103
2104
2105static char decode__doc__[] =
2106"S.decode([encoding[,errors]]) -> object\n\
2107\n\
2108Decodes S using the codec registered for encoding. encoding defaults\n\
2109to the default encoding. errors may be given to set a different error\n\
2110handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2111a ValueError. Other possible values are 'ignore' and 'replace'.";
2112
2113static PyObject *
2114string_decode(PyStringObject *self, PyObject *args)
2115{
2116 char *encoding = NULL;
2117 char *errors = NULL;
2118 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2119 return NULL;
2120 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002121}
2122
2123
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124static char expandtabs__doc__[] =
2125"S.expandtabs([tabsize]) -> string\n\
2126\n\
2127Return a copy of S where all tab characters are expanded using spaces.\n\
2128If tabsize is not given, a tab size of 8 characters is assumed.";
2129
2130static PyObject*
2131string_expandtabs(PyStringObject *self, PyObject *args)
2132{
2133 const char *e, *p;
2134 char *q;
2135 int i, j;
2136 PyObject *u;
2137 int tabsize = 8;
2138
2139 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2140 return NULL;
2141
Thomas Wouters7e474022000-07-16 12:04:32 +00002142 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002143 i = j = 0;
2144 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2145 for (p = PyString_AS_STRING(self); p < e; p++)
2146 if (*p == '\t') {
2147 if (tabsize > 0)
2148 j += tabsize - (j % tabsize);
2149 }
2150 else {
2151 j++;
2152 if (*p == '\n' || *p == '\r') {
2153 i += j;
2154 j = 0;
2155 }
2156 }
2157
2158 /* Second pass: create output string and fill it */
2159 u = PyString_FromStringAndSize(NULL, i + j);
2160 if (!u)
2161 return NULL;
2162
2163 j = 0;
2164 q = PyString_AS_STRING(u);
2165
2166 for (p = PyString_AS_STRING(self); p < e; p++)
2167 if (*p == '\t') {
2168 if (tabsize > 0) {
2169 i = tabsize - (j % tabsize);
2170 j += i;
2171 while (i--)
2172 *q++ = ' ';
2173 }
2174 }
2175 else {
2176 j++;
2177 *q++ = *p;
2178 if (*p == '\n' || *p == '\r')
2179 j = 0;
2180 }
2181
2182 return u;
2183}
2184
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002185static
2186PyObject *pad(PyStringObject *self,
2187 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 int right,
2189 char fill)
2190{
2191 PyObject *u;
2192
2193 if (left < 0)
2194 left = 0;
2195 if (right < 0)
2196 right = 0;
2197
2198 if (left == 0 && right == 0) {
2199 Py_INCREF(self);
2200 return (PyObject *)self;
2201 }
2202
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002203 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 left + PyString_GET_SIZE(self) + right);
2205 if (u) {
2206 if (left)
2207 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002208 memcpy(PyString_AS_STRING(u) + left,
2209 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 PyString_GET_SIZE(self));
2211 if (right)
2212 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2213 fill, right);
2214 }
2215
2216 return u;
2217}
2218
2219static char ljust__doc__[] =
2220"S.ljust(width) -> string\n\
2221\n\
2222Return S left justified in a string of length width. Padding is\n\
2223done using spaces.";
2224
2225static PyObject *
2226string_ljust(PyStringObject *self, PyObject *args)
2227{
2228 int width;
2229 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2230 return NULL;
2231
2232 if (PyString_GET_SIZE(self) >= width) {
2233 Py_INCREF(self);
2234 return (PyObject*) self;
2235 }
2236
2237 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2238}
2239
2240
2241static char rjust__doc__[] =
2242"S.rjust(width) -> string\n\
2243\n\
2244Return S right justified in a string of length width. Padding is\n\
2245done using spaces.";
2246
2247static PyObject *
2248string_rjust(PyStringObject *self, PyObject *args)
2249{
2250 int width;
2251 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2252 return NULL;
2253
2254 if (PyString_GET_SIZE(self) >= width) {
2255 Py_INCREF(self);
2256 return (PyObject*) self;
2257 }
2258
2259 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2260}
2261
2262
2263static char center__doc__[] =
2264"S.center(width) -> string\n\
2265\n\
2266Return S centered in a string of length width. Padding is done\n\
2267using spaces.";
2268
2269static PyObject *
2270string_center(PyStringObject *self, PyObject *args)
2271{
2272 int marg, left;
2273 int width;
2274
2275 if (!PyArg_ParseTuple(args, "i:center", &width))
2276 return NULL;
2277
2278 if (PyString_GET_SIZE(self) >= width) {
2279 Py_INCREF(self);
2280 return (PyObject*) self;
2281 }
2282
2283 marg = width - PyString_GET_SIZE(self);
2284 left = marg / 2 + (marg & width & 1);
2285
2286 return pad(self, left, marg - left, ' ');
2287}
2288
2289#if 0
2290static char zfill__doc__[] =
2291"S.zfill(width) -> string\n\
2292\n\
2293Pad a numeric string x with zeros on the left, to fill a field\n\
2294of the specified width. The string x is never truncated.";
2295
2296static PyObject *
2297string_zfill(PyStringObject *self, PyObject *args)
2298{
2299 int fill;
2300 PyObject *u;
2301 char *str;
2302
2303 int width;
2304 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2305 return NULL;
2306
2307 if (PyString_GET_SIZE(self) >= width) {
2308 Py_INCREF(self);
2309 return (PyObject*) self;
2310 }
2311
2312 fill = width - PyString_GET_SIZE(self);
2313
2314 u = pad(self, fill, 0, '0');
2315 if (u == NULL)
2316 return NULL;
2317
2318 str = PyString_AS_STRING(u);
2319 if (str[fill] == '+' || str[fill] == '-') {
2320 /* move sign to beginning of string */
2321 str[0] = str[fill];
2322 str[fill] = '0';
2323 }
2324
2325 return u;
2326}
2327#endif
2328
2329static char isspace__doc__[] =
2330"S.isspace() -> int\n\
2331\n\
2332Return 1 if there are only whitespace characters in S,\n\
23330 otherwise.";
2334
2335static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002336string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337{
Fred Drakeba096332000-07-09 07:04:36 +00002338 register const unsigned char *p
2339 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002340 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341
Guido van Rossum4c08d552000-03-10 22:55:18 +00002342 /* Shortcut for single character strings */
2343 if (PyString_GET_SIZE(self) == 1 &&
2344 isspace(*p))
2345 return PyInt_FromLong(1);
2346
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002347 /* Special case for empty strings */
2348 if (PyString_GET_SIZE(self) == 0)
2349 return PyInt_FromLong(0);
2350
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 e = p + PyString_GET_SIZE(self);
2352 for (; p < e; p++) {
2353 if (!isspace(*p))
2354 return PyInt_FromLong(0);
2355 }
2356 return PyInt_FromLong(1);
2357}
2358
2359
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002360static char isalpha__doc__[] =
2361"S.isalpha() -> int\n\
2362\n\
2363Return 1 if all characters in S are alphabetic\n\
2364and there is at least one character in S, 0 otherwise.";
2365
2366static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002367string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002368{
Fred Drakeba096332000-07-09 07:04:36 +00002369 register const unsigned char *p
2370 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002371 register const unsigned char *e;
2372
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002373 /* Shortcut for single character strings */
2374 if (PyString_GET_SIZE(self) == 1 &&
2375 isalpha(*p))
2376 return PyInt_FromLong(1);
2377
2378 /* Special case for empty strings */
2379 if (PyString_GET_SIZE(self) == 0)
2380 return PyInt_FromLong(0);
2381
2382 e = p + PyString_GET_SIZE(self);
2383 for (; p < e; p++) {
2384 if (!isalpha(*p))
2385 return PyInt_FromLong(0);
2386 }
2387 return PyInt_FromLong(1);
2388}
2389
2390
2391static char isalnum__doc__[] =
2392"S.isalnum() -> int\n\
2393\n\
2394Return 1 if all characters in S are alphanumeric\n\
2395and there is at least one character in S, 0 otherwise.";
2396
2397static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002398string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002399{
Fred Drakeba096332000-07-09 07:04:36 +00002400 register const unsigned char *p
2401 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002402 register const unsigned char *e;
2403
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002404 /* Shortcut for single character strings */
2405 if (PyString_GET_SIZE(self) == 1 &&
2406 isalnum(*p))
2407 return PyInt_FromLong(1);
2408
2409 /* Special case for empty strings */
2410 if (PyString_GET_SIZE(self) == 0)
2411 return PyInt_FromLong(0);
2412
2413 e = p + PyString_GET_SIZE(self);
2414 for (; p < e; p++) {
2415 if (!isalnum(*p))
2416 return PyInt_FromLong(0);
2417 }
2418 return PyInt_FromLong(1);
2419}
2420
2421
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422static char isdigit__doc__[] =
2423"S.isdigit() -> int\n\
2424\n\
2425Return 1 if there are only digit characters in S,\n\
24260 otherwise.";
2427
2428static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002429string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002430{
Fred Drakeba096332000-07-09 07:04:36 +00002431 register const unsigned char *p
2432 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002433 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 /* Shortcut for single character strings */
2436 if (PyString_GET_SIZE(self) == 1 &&
2437 isdigit(*p))
2438 return PyInt_FromLong(1);
2439
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002440 /* Special case for empty strings */
2441 if (PyString_GET_SIZE(self) == 0)
2442 return PyInt_FromLong(0);
2443
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 e = p + PyString_GET_SIZE(self);
2445 for (; p < e; p++) {
2446 if (!isdigit(*p))
2447 return PyInt_FromLong(0);
2448 }
2449 return PyInt_FromLong(1);
2450}
2451
2452
2453static char islower__doc__[] =
2454"S.islower() -> int\n\
2455\n\
2456Return 1 if all cased characters in S are lowercase and there is\n\
2457at least one cased character in S, 0 otherwise.";
2458
2459static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002460string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461{
Fred Drakeba096332000-07-09 07:04:36 +00002462 register const unsigned char *p
2463 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002464 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 int cased;
2466
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 /* Shortcut for single character strings */
2468 if (PyString_GET_SIZE(self) == 1)
2469 return PyInt_FromLong(islower(*p) != 0);
2470
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002471 /* Special case for empty strings */
2472 if (PyString_GET_SIZE(self) == 0)
2473 return PyInt_FromLong(0);
2474
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475 e = p + PyString_GET_SIZE(self);
2476 cased = 0;
2477 for (; p < e; p++) {
2478 if (isupper(*p))
2479 return PyInt_FromLong(0);
2480 else if (!cased && islower(*p))
2481 cased = 1;
2482 }
2483 return PyInt_FromLong(cased);
2484}
2485
2486
2487static char isupper__doc__[] =
2488"S.isupper() -> int\n\
2489\n\
2490Return 1 if all cased characters in S are uppercase and there is\n\
2491at least one cased character in S, 0 otherwise.";
2492
2493static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002494string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002495{
Fred Drakeba096332000-07-09 07:04:36 +00002496 register const unsigned char *p
2497 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002498 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 int cased;
2500
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 /* Shortcut for single character strings */
2502 if (PyString_GET_SIZE(self) == 1)
2503 return PyInt_FromLong(isupper(*p) != 0);
2504
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002505 /* Special case for empty strings */
2506 if (PyString_GET_SIZE(self) == 0)
2507 return PyInt_FromLong(0);
2508
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 e = p + PyString_GET_SIZE(self);
2510 cased = 0;
2511 for (; p < e; p++) {
2512 if (islower(*p))
2513 return PyInt_FromLong(0);
2514 else if (!cased && isupper(*p))
2515 cased = 1;
2516 }
2517 return PyInt_FromLong(cased);
2518}
2519
2520
2521static char istitle__doc__[] =
2522"S.istitle() -> int\n\
2523\n\
2524Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2525may only follow uncased characters and lowercase characters only cased\n\
2526ones. Return 0 otherwise.";
2527
2528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002529string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530{
Fred Drakeba096332000-07-09 07:04:36 +00002531 register const unsigned char *p
2532 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002533 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 int cased, previous_is_cased;
2535
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 /* Shortcut for single character strings */
2537 if (PyString_GET_SIZE(self) == 1)
2538 return PyInt_FromLong(isupper(*p) != 0);
2539
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002540 /* Special case for empty strings */
2541 if (PyString_GET_SIZE(self) == 0)
2542 return PyInt_FromLong(0);
2543
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 e = p + PyString_GET_SIZE(self);
2545 cased = 0;
2546 previous_is_cased = 0;
2547 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002548 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549
2550 if (isupper(ch)) {
2551 if (previous_is_cased)
2552 return PyInt_FromLong(0);
2553 previous_is_cased = 1;
2554 cased = 1;
2555 }
2556 else if (islower(ch)) {
2557 if (!previous_is_cased)
2558 return PyInt_FromLong(0);
2559 previous_is_cased = 1;
2560 cased = 1;
2561 }
2562 else
2563 previous_is_cased = 0;
2564 }
2565 return PyInt_FromLong(cased);
2566}
2567
2568
2569static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002570"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571\n\
2572Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002573Line breaks are not included in the resulting list unless keepends\n\
2574is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575
2576#define SPLIT_APPEND(data, left, right) \
2577 str = PyString_FromStringAndSize(data + left, right - left); \
2578 if (!str) \
2579 goto onError; \
2580 if (PyList_Append(list, str)) { \
2581 Py_DECREF(str); \
2582 goto onError; \
2583 } \
2584 else \
2585 Py_DECREF(str);
2586
2587static PyObject*
2588string_splitlines(PyStringObject *self, PyObject *args)
2589{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 register int i;
2591 register int j;
2592 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002593 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 PyObject *list;
2595 PyObject *str;
2596 char *data;
2597
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002598 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 return NULL;
2600
2601 data = PyString_AS_STRING(self);
2602 len = PyString_GET_SIZE(self);
2603
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 list = PyList_New(0);
2605 if (!list)
2606 goto onError;
2607
2608 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002609 int eol;
2610
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 /* Find a line and append it */
2612 while (i < len && data[i] != '\n' && data[i] != '\r')
2613 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614
2615 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002616 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 if (i < len) {
2618 if (data[i] == '\r' && i + 1 < len &&
2619 data[i+1] == '\n')
2620 i += 2;
2621 else
2622 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002623 if (keepends)
2624 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002625 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002626 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002627 j = i;
2628 }
2629 if (j < len) {
2630 SPLIT_APPEND(data, j, len);
2631 }
2632
2633 return list;
2634
2635 onError:
2636 Py_DECREF(list);
2637 return NULL;
2638}
2639
2640#undef SPLIT_APPEND
2641
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002643static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002644string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 /* Counterparts of the obsolete stropmodule functions; except
2646 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002647 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2648 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2649 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2650 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2651 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2652 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2653 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2654 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2655 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2656 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2657 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2658 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2659 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2660 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2661 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2662 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2663 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2664 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2665 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2666 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2667 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2668 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2669 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2670 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2671 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2672 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2673 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2674 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2675 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2676 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2677 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2679 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002681 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002683 {NULL, NULL} /* sentinel */
2684};
2685
2686static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002687string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002688{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002689 PyObject *x = NULL;
2690 static char *kwlist[] = {"object", 0};
2691
2692 assert(type == &PyString_Type);
2693 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2694 return NULL;
2695 if (x == NULL)
2696 return PyString_FromString("");
2697 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002698}
2699
Tim Peters6d6c1a32001-08-02 04:15:00 +00002700static char string_doc[] =
2701"str(object) -> string\n\
2702\n\
2703Return a nice string representation of the object.\n\
2704If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002705
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002706PyTypeObject PyString_Type = {
2707 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002708 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002709 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002710 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002711 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002712 (destructor)string_dealloc, /* tp_dealloc */
2713 (printfunc)string_print, /* tp_print */
2714 0, /* tp_getattr */
2715 0, /* tp_setattr */
2716 0, /* tp_compare */
2717 (reprfunc)string_repr, /* tp_repr */
2718 0, /* tp_as_number */
2719 &string_as_sequence, /* tp_as_sequence */
2720 0, /* tp_as_mapping */
2721 (hashfunc)string_hash, /* tp_hash */
2722 0, /* tp_call */
2723 (reprfunc)string_str, /* tp_str */
2724 PyObject_GenericGetAttr, /* tp_getattro */
2725 0, /* tp_setattro */
2726 &string_as_buffer, /* tp_as_buffer */
2727 Py_TPFLAGS_DEFAULT, /* tp_flags */
2728 string_doc, /* tp_doc */
2729 0, /* tp_traverse */
2730 0, /* tp_clear */
2731 (richcmpfunc)string_richcompare, /* tp_richcompare */
2732 0, /* tp_weaklistoffset */
2733 0, /* tp_iter */
2734 0, /* tp_iternext */
2735 string_methods, /* tp_methods */
2736 0, /* tp_members */
2737 0, /* tp_getset */
2738 0, /* tp_base */
2739 0, /* tp_dict */
2740 0, /* tp_descr_get */
2741 0, /* tp_descr_set */
2742 0, /* tp_dictoffset */
2743 0, /* tp_init */
2744 0, /* tp_alloc */
2745 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002746};
2747
2748void
Fred Drakeba096332000-07-09 07:04:36 +00002749PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002752 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002753 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002754 if (w == NULL || !PyString_Check(*pv)) {
2755 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002756 *pv = NULL;
2757 return;
2758 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 v = string_concat((PyStringObject *) *pv, w);
2760 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002761 *pv = v;
2762}
2763
Guido van Rossum013142a1994-08-30 08:19:36 +00002764void
Fred Drakeba096332000-07-09 07:04:36 +00002765PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002766{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002767 PyString_Concat(pv, w);
2768 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002769}
2770
2771
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002772/* The following function breaks the notion that strings are immutable:
2773 it changes the size of a string. We get away with this only if there
2774 is only one module referencing the object. You can also think of it
2775 as creating a new string object and destroying the old one, only
2776 more efficiently. In any case, don't use this if the string may
2777 already be known to some other part of the code... */
2778
2779int
Fred Drakeba096332000-07-09 07:04:36 +00002780_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002781{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 register PyObject *v;
2783 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002784 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002786 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 Py_DECREF(v);
2788 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002789 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002790 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002791 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002792#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002793 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002794#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 _Py_ForgetReference(v);
2796 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002797 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002799 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002800 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002802 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002804 _Py_NewReference(*pv);
2805 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002806 sv->ob_size = newsize;
2807 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002808 return 0;
2809}
Guido van Rossume5372401993-03-16 12:15:04 +00002810
2811/* Helpers for formatstring */
2812
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002813static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002814getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002815{
2816 int argidx = *p_argidx;
2817 if (argidx < arglen) {
2818 (*p_argidx)++;
2819 if (arglen < 0)
2820 return args;
2821 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002823 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 PyErr_SetString(PyExc_TypeError,
2825 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002826 return NULL;
2827}
2828
Tim Peters38fd5b62000-09-21 05:43:11 +00002829/* Format codes
2830 * F_LJUST '-'
2831 * F_SIGN '+'
2832 * F_BLANK ' '
2833 * F_ALT '#'
2834 * F_ZERO '0'
2835 */
Guido van Rossume5372401993-03-16 12:15:04 +00002836#define F_LJUST (1<<0)
2837#define F_SIGN (1<<1)
2838#define F_BLANK (1<<2)
2839#define F_ALT (1<<3)
2840#define F_ZERO (1<<4)
2841
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002842static int
Fred Drakeba096332000-07-09 07:04:36 +00002843formatfloat(char *buf, size_t buflen, int flags,
2844 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002845{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002846 /* fmt = '%#.' + `prec` + `type`
2847 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002848 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002849 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002850 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002851 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002852 if (prec < 0)
2853 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002854 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2855 type = 'g';
2856 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002857 /* worst case length calc to ensure no buffer overrun:
2858 fmt = %#.<prec>g
2859 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002860 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002861 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2862 If prec=0 the effective precision is 1 (the leading digit is
2863 always given), therefore increase by one to 10+prec. */
2864 if (buflen <= (size_t)10 + (size_t)prec) {
2865 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002866 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 return -1;
2868 }
Guido van Rossume5372401993-03-16 12:15:04 +00002869 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002870 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002871}
2872
Tim Peters38fd5b62000-09-21 05:43:11 +00002873/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2874 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2875 * Python's regular ints.
2876 * Return value: a new PyString*, or NULL if error.
2877 * . *pbuf is set to point into it,
2878 * *plen set to the # of chars following that.
2879 * Caller must decref it when done using pbuf.
2880 * The string starting at *pbuf is of the form
2881 * "-"? ("0x" | "0X")? digit+
2882 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002883 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002884 * There will be at least prec digits, zero-filled on the left if
2885 * necessary to get that many.
2886 * val object to be converted
2887 * flags bitmask of format flags; only F_ALT is looked at
2888 * prec minimum number of digits; 0-fill on left if needed
2889 * type a character in [duoxX]; u acts the same as d
2890 *
2891 * CAUTION: o, x and X conversions on regular ints can never
2892 * produce a '-' sign, but can for Python's unbounded ints.
2893 */
2894PyObject*
2895_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2896 char **pbuf, int *plen)
2897{
2898 PyObject *result = NULL;
2899 char *buf;
2900 int i;
2901 int sign; /* 1 if '-', else 0 */
2902 int len; /* number of characters */
2903 int numdigits; /* len == numnondigits + numdigits */
2904 int numnondigits = 0;
2905
2906 switch (type) {
2907 case 'd':
2908 case 'u':
2909 result = val->ob_type->tp_str(val);
2910 break;
2911 case 'o':
2912 result = val->ob_type->tp_as_number->nb_oct(val);
2913 break;
2914 case 'x':
2915 case 'X':
2916 numnondigits = 2;
2917 result = val->ob_type->tp_as_number->nb_hex(val);
2918 break;
2919 default:
2920 assert(!"'type' not in [duoxX]");
2921 }
2922 if (!result)
2923 return NULL;
2924
2925 /* To modify the string in-place, there can only be one reference. */
2926 if (result->ob_refcnt != 1) {
2927 PyErr_BadInternalCall();
2928 return NULL;
2929 }
2930 buf = PyString_AsString(result);
2931 len = PyString_Size(result);
2932 if (buf[len-1] == 'L') {
2933 --len;
2934 buf[len] = '\0';
2935 }
2936 sign = buf[0] == '-';
2937 numnondigits += sign;
2938 numdigits = len - numnondigits;
2939 assert(numdigits > 0);
2940
Tim Petersfff53252001-04-12 18:38:48 +00002941 /* Get rid of base marker unless F_ALT */
2942 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002943 /* Need to skip 0x, 0X or 0. */
2944 int skipped = 0;
2945 switch (type) {
2946 case 'o':
2947 assert(buf[sign] == '0');
2948 /* If 0 is only digit, leave it alone. */
2949 if (numdigits > 1) {
2950 skipped = 1;
2951 --numdigits;
2952 }
2953 break;
2954 case 'x':
2955 case 'X':
2956 assert(buf[sign] == '0');
2957 assert(buf[sign + 1] == 'x');
2958 skipped = 2;
2959 numnondigits -= 2;
2960 break;
2961 }
2962 if (skipped) {
2963 buf += skipped;
2964 len -= skipped;
2965 if (sign)
2966 buf[0] = '-';
2967 }
2968 assert(len == numnondigits + numdigits);
2969 assert(numdigits > 0);
2970 }
2971
2972 /* Fill with leading zeroes to meet minimum width. */
2973 if (prec > numdigits) {
2974 PyObject *r1 = PyString_FromStringAndSize(NULL,
2975 numnondigits + prec);
2976 char *b1;
2977 if (!r1) {
2978 Py_DECREF(result);
2979 return NULL;
2980 }
2981 b1 = PyString_AS_STRING(r1);
2982 for (i = 0; i < numnondigits; ++i)
2983 *b1++ = *buf++;
2984 for (i = 0; i < prec - numdigits; i++)
2985 *b1++ = '0';
2986 for (i = 0; i < numdigits; i++)
2987 *b1++ = *buf++;
2988 *b1 = '\0';
2989 Py_DECREF(result);
2990 result = r1;
2991 buf = PyString_AS_STRING(result);
2992 len = numnondigits + prec;
2993 }
2994
2995 /* Fix up case for hex conversions. */
2996 switch (type) {
2997 case 'x':
2998 /* Need to convert all upper case letters to lower case. */
2999 for (i = 0; i < len; i++)
3000 if (buf[i] >= 'A' && buf[i] <= 'F')
3001 buf[i] += 'a'-'A';
3002 break;
3003 case 'X':
3004 /* Need to convert 0x to 0X (and -0x to -0X). */
3005 if (buf[sign + 1] == 'x')
3006 buf[sign + 1] = 'X';
3007 break;
3008 }
3009 *pbuf = buf;
3010 *plen = len;
3011 return result;
3012}
3013
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003014static int
Fred Drakeba096332000-07-09 07:04:36 +00003015formatint(char *buf, size_t buflen, int flags,
3016 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003017{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003018 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003019 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3020 + 1 + 1 = 24 */
3021 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003022 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003023 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003024 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003025 if (prec < 0)
3026 prec = 1;
3027 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003028 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003029 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003030 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003031 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003032 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003033 return -1;
3034 }
Guido van Rossume5372401993-03-16 12:15:04 +00003035 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003036 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3037 * but we want it (for consistency with other %#x conversions, and
3038 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003039 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3040 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3041 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003042 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003043 if (x == 0 &&
3044 (flags & F_ALT) &&
3045 (type == 'x' || type == 'X') &&
3046 buf[1] != (char)type) /* this last always true under std C */
3047 {
Tim Petersfff53252001-04-12 18:38:48 +00003048 memmove(buf+2, buf, strlen(buf) + 1);
3049 buf[0] = '0';
3050 buf[1] = (char)type;
3051 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003052 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003053}
3054
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003055static int
Fred Drakeba096332000-07-09 07:04:36 +00003056formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003057{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003058 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003059 if (PyString_Check(v)) {
3060 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003061 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003062 }
3063 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003064 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003065 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003066 }
3067 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003068 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003069}
3070
Guido van Rossum013142a1994-08-30 08:19:36 +00003071
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003072/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3073
3074 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3075 chars are formatted. XXX This is a magic number. Each formatting
3076 routine does bounds checking to ensure no overflow, but a better
3077 solution may be to malloc a buffer of appropriate size for each
3078 format. For now, the current solution is sufficient.
3079*/
3080#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003081
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003083PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003084{
3085 char *fmt, *res;
3086 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003087 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003088 PyObject *result, *orig_args;
3089#ifdef Py_USING_UNICODE
3090 PyObject *v, *w;
3091#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003092 PyObject *dict = NULL;
3093 if (format == NULL || !PyString_Check(format) || args == NULL) {
3094 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003095 return NULL;
3096 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003097 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003098 fmt = PyString_AsString(format);
3099 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003100 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003101 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003102 if (result == NULL)
3103 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 res = PyString_AsString(result);
3105 if (PyTuple_Check(args)) {
3106 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003107 argidx = 0;
3108 }
3109 else {
3110 arglen = -1;
3111 argidx = -2;
3112 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003113 if (args->ob_type->tp_as_mapping)
3114 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003115 while (--fmtcnt >= 0) {
3116 if (*fmt != '%') {
3117 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003118 rescnt = fmtcnt + 100;
3119 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003121 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122 res = PyString_AsString(result)
3123 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003124 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003125 }
3126 *res++ = *fmt++;
3127 }
3128 else {
3129 /* Got a format specifier */
3130 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003131 int width = -1;
3132 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003133 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003134 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003135 PyObject *v = NULL;
3136 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003137 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003138 int sign;
3139 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003140 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003141#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003142 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003143 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003144#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003145
Guido van Rossumda9c2711996-12-05 21:58:58 +00003146 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003147 if (*fmt == '(') {
3148 char *keystart;
3149 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003151 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003152
3153 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003154 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003155 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003156 goto error;
3157 }
3158 ++fmt;
3159 --fmtcnt;
3160 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003161 /* Skip over balanced parentheses */
3162 while (pcount > 0 && --fmtcnt >= 0) {
3163 if (*fmt == ')')
3164 --pcount;
3165 else if (*fmt == '(')
3166 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003167 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003168 }
3169 keylen = fmt - keystart - 1;
3170 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003171 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003172 "incomplete format key");
3173 goto error;
3174 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003175 key = PyString_FromStringAndSize(keystart,
3176 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003177 if (key == NULL)
3178 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003179 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003180 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003181 args_owned = 0;
3182 }
3183 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003185 if (args == NULL) {
3186 goto error;
3187 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003188 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003189 arglen = -1;
3190 argidx = -2;
3191 }
Guido van Rossume5372401993-03-16 12:15:04 +00003192 while (--fmtcnt >= 0) {
3193 switch (c = *fmt++) {
3194 case '-': flags |= F_LJUST; continue;
3195 case '+': flags |= F_SIGN; continue;
3196 case ' ': flags |= F_BLANK; continue;
3197 case '#': flags |= F_ALT; continue;
3198 case '0': flags |= F_ZERO; continue;
3199 }
3200 break;
3201 }
3202 if (c == '*') {
3203 v = getnextarg(args, arglen, &argidx);
3204 if (v == NULL)
3205 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003206 if (!PyInt_Check(v)) {
3207 PyErr_SetString(PyExc_TypeError,
3208 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003209 goto error;
3210 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003211 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003212 if (width < 0) {
3213 flags |= F_LJUST;
3214 width = -width;
3215 }
Guido van Rossume5372401993-03-16 12:15:04 +00003216 if (--fmtcnt >= 0)
3217 c = *fmt++;
3218 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003219 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003220 width = c - '0';
3221 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003222 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003223 if (!isdigit(c))
3224 break;
3225 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 PyErr_SetString(
3227 PyExc_ValueError,
3228 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003229 goto error;
3230 }
3231 width = width*10 + (c - '0');
3232 }
3233 }
3234 if (c == '.') {
3235 prec = 0;
3236 if (--fmtcnt >= 0)
3237 c = *fmt++;
3238 if (c == '*') {
3239 v = getnextarg(args, arglen, &argidx);
3240 if (v == NULL)
3241 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003242 if (!PyInt_Check(v)) {
3243 PyErr_SetString(
3244 PyExc_TypeError,
3245 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003246 goto error;
3247 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003249 if (prec < 0)
3250 prec = 0;
3251 if (--fmtcnt >= 0)
3252 c = *fmt++;
3253 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003254 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003255 prec = c - '0';
3256 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003257 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003258 if (!isdigit(c))
3259 break;
3260 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 PyErr_SetString(
3262 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003263 "prec too big");
3264 goto error;
3265 }
3266 prec = prec*10 + (c - '0');
3267 }
3268 }
3269 } /* prec */
3270 if (fmtcnt >= 0) {
3271 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003272 if (--fmtcnt >= 0)
3273 c = *fmt++;
3274 }
3275 }
3276 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 PyErr_SetString(PyExc_ValueError,
3278 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003279 goto error;
3280 }
3281 if (c != '%') {
3282 v = getnextarg(args, arglen, &argidx);
3283 if (v == NULL)
3284 goto error;
3285 }
3286 sign = 0;
3287 fill = ' ';
3288 switch (c) {
3289 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003290 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003291 len = 1;
3292 break;
3293 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003294 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003295#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003296 if (PyUnicode_Check(v)) {
3297 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003298 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003299 goto unicode;
3300 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003301#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003302 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003304 else
3305 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003306 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003307 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003308 if (!PyString_Check(temp)) {
3309 PyErr_SetString(PyExc_TypeError,
3310 "%s argument has non-string str()");
3311 goto error;
3312 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003313 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003315 if (prec >= 0 && len > prec)
3316 len = prec;
3317 break;
3318 case 'i':
3319 case 'd':
3320 case 'u':
3321 case 'o':
3322 case 'x':
3323 case 'X':
3324 if (c == 'i')
3325 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003326 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003327 temp = _PyString_FormatLong(v, flags,
3328 prec, c, &pbuf, &len);
3329 if (!temp)
3330 goto error;
3331 /* unbounded ints can always produce
3332 a sign character! */
3333 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003334 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003335 else {
3336 pbuf = formatbuf;
3337 len = formatint(pbuf, sizeof(formatbuf),
3338 flags, prec, c, v);
3339 if (len < 0)
3340 goto error;
3341 /* only d conversion is signed */
3342 sign = c == 'd';
3343 }
3344 if (flags & F_ZERO)
3345 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003346 break;
3347 case 'e':
3348 case 'E':
3349 case 'f':
3350 case 'g':
3351 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003352 pbuf = formatbuf;
3353 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003354 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003355 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003356 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003357 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003358 fill = '0';
3359 break;
3360 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003361 pbuf = formatbuf;
3362 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003363 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003364 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003365 break;
3366 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003367 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003368 "unsupported format character '%c' (0x%x) "
3369 "at index %i",
3370 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003371 goto error;
3372 }
3373 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003374 if (*pbuf == '-' || *pbuf == '+') {
3375 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003376 len--;
3377 }
3378 else if (flags & F_SIGN)
3379 sign = '+';
3380 else if (flags & F_BLANK)
3381 sign = ' ';
3382 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003383 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003384 }
3385 if (width < len)
3386 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003387 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003388 reslen -= rescnt;
3389 rescnt = width + fmtcnt + 100;
3390 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003391 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003392 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003393 res = PyString_AsString(result)
3394 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003395 }
3396 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003397 if (fill != ' ')
3398 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003399 rescnt--;
3400 if (width > len)
3401 width--;
3402 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003403 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3404 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003405 assert(pbuf[1] == c);
3406 if (fill != ' ') {
3407 *res++ = *pbuf++;
3408 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003409 }
Tim Petersfff53252001-04-12 18:38:48 +00003410 rescnt -= 2;
3411 width -= 2;
3412 if (width < 0)
3413 width = 0;
3414 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003415 }
3416 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003417 do {
3418 --rescnt;
3419 *res++ = fill;
3420 } while (--width > len);
3421 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003422 if (fill == ' ') {
3423 if (sign)
3424 *res++ = sign;
3425 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003426 (c == 'x' || c == 'X')) {
3427 assert(pbuf[0] == '0');
3428 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003429 *res++ = *pbuf++;
3430 *res++ = *pbuf++;
3431 }
3432 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003433 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003434 res += len;
3435 rescnt -= len;
3436 while (--width >= len) {
3437 --rescnt;
3438 *res++ = ' ';
3439 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003440 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003441 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003442 "not all arguments converted");
3443 goto error;
3444 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003446 } /* '%' */
3447 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003448 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 PyErr_SetString(PyExc_TypeError,
3450 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003451 goto error;
3452 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003453 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003455 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003456 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003457 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003458
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003459#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003460 unicode:
3461 if (args_owned) {
3462 Py_DECREF(args);
3463 args_owned = 0;
3464 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003465 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003466 if (PyTuple_Check(orig_args) && argidx > 0) {
3467 PyObject *v;
3468 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3469 v = PyTuple_New(n);
3470 if (v == NULL)
3471 goto error;
3472 while (--n >= 0) {
3473 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3474 Py_INCREF(w);
3475 PyTuple_SET_ITEM(v, n, w);
3476 }
3477 args = v;
3478 } else {
3479 Py_INCREF(orig_args);
3480 args = orig_args;
3481 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003482 args_owned = 1;
3483 /* Take what we have of the result and let the Unicode formatting
3484 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003485 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003486 if (_PyString_Resize(&result, rescnt))
3487 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003488 fmtcnt = PyString_GET_SIZE(format) - \
3489 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003490 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3491 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003492 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003493 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003494 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003495 if (v == NULL)
3496 goto error;
3497 /* Paste what we have (result) to what the Unicode formatting
3498 function returned (v) and return the result (or error) */
3499 w = PyUnicode_Concat(result, v);
3500 Py_DECREF(result);
3501 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003502 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003503 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003504#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003505
Guido van Rossume5372401993-03-16 12:15:04 +00003506 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003507 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003508 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003510 }
Guido van Rossume5372401993-03-16 12:15:04 +00003511 return NULL;
3512}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003513
3514
3515#ifdef INTERN_STRINGS
3516
Barry Warsaw4df762f2000-08-16 23:41:01 +00003517/* This dictionary will leak at PyString_Fini() time. That's acceptable
3518 * because PyString_Fini() specifically frees interned strings that are
3519 * only referenced by this dictionary. The CVS log entry for revision 2.45
3520 * says:
3521 *
3522 * Change the Fini function to only remove otherwise unreferenced
3523 * strings from the interned table. There are references in
3524 * hard-to-find static variables all over the interpreter, and it's not
3525 * worth trying to get rid of all those; but "uninterning" isn't fair
3526 * either and may cause subtle failures later -- so we have to keep them
3527 * in the interned table.
3528 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003529static PyObject *interned;
3530
3531void
Fred Drakeba096332000-07-09 07:04:36 +00003532PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003533{
3534 register PyStringObject *s = (PyStringObject *)(*p);
3535 PyObject *t;
3536 if (s == NULL || !PyString_Check(s))
3537 Py_FatalError("PyString_InternInPlace: strings only please!");
3538 if ((t = s->ob_sinterned) != NULL) {
3539 if (t == (PyObject *)s)
3540 return;
3541 Py_INCREF(t);
3542 *p = t;
3543 Py_DECREF(s);
3544 return;
3545 }
3546 if (interned == NULL) {
3547 interned = PyDict_New();
3548 if (interned == NULL)
3549 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003550 }
3551 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3552 Py_INCREF(t);
3553 *p = s->ob_sinterned = t;
3554 Py_DECREF(s);
3555 return;
3556 }
3557 t = (PyObject *)s;
3558 if (PyDict_SetItem(interned, t, t) == 0) {
3559 s->ob_sinterned = t;
3560 return;
3561 }
3562 PyErr_Clear();
3563}
3564
3565
3566PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003567PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003568{
3569 PyObject *s = PyString_FromString(cp);
3570 if (s == NULL)
3571 return NULL;
3572 PyString_InternInPlace(&s);
3573 return s;
3574}
3575
3576#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003577
3578void
Fred Drakeba096332000-07-09 07:04:36 +00003579PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003580{
3581 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003582 for (i = 0; i < UCHAR_MAX + 1; i++) {
3583 Py_XDECREF(characters[i]);
3584 characters[i] = NULL;
3585 }
3586#ifndef DONT_SHARE_SHORT_STRINGS
3587 Py_XDECREF(nullstring);
3588 nullstring = NULL;
3589#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003590#ifdef INTERN_STRINGS
3591 if (interned) {
3592 int pos, changed;
3593 PyObject *key, *value;
3594 do {
3595 changed = 0;
3596 pos = 0;
3597 while (PyDict_Next(interned, &pos, &key, &value)) {
3598 if (key->ob_refcnt == 2 && key == value) {
3599 PyDict_DelItem(interned, key);
3600 changed = 1;
3601 }
3602 }
3603 } while (changed);
3604 }
3605#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003606}
Barry Warsawa903ad982001-02-23 16:40:48 +00003607
3608#ifdef INTERN_STRINGS
3609void _Py_ReleaseInternedStrings(void)
3610{
3611 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003612 fprintf(stderr, "releasing interned strings\n");
3613 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003614 Py_DECREF(interned);
3615 interned = NULL;
3616 }
3617}
3618#endif /* INTERN_STRINGS */