blob: 3acc69f9b099025c03d8e4d3acdd81c188e3e03d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
272 s += strlen(s);
273 break;
274 case '%':
275 *s++ = '%';
276 break;
277 default:
278 strcpy(s, p);
279 s += strlen(s);
280 goto end;
281 }
282 } else
283 *s++ = *f;
284 }
285
286 end:
287 _PyString_Resize(&string, s - PyString_AsString(string));
288 return string;
289}
290
291PyObject *
292PyString_FromFormat(const char *format, ...)
293{
294 va_list vargs;
295
296#ifdef HAVE_STDARG_PROTOTYPES
297 va_start(vargs, format);
298#else
299 va_start(vargs);
300#endif
301 return PyString_FromFormatV(format, vargs);
302}
303
304
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000305PyObject *PyString_Decode(const char *s,
306 int size,
307 const char *encoding,
308 const char *errors)
309{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000310 PyObject *v, *str;
311
312 str = PyString_FromStringAndSize(s, size);
313 if (str == NULL)
314 return NULL;
315 v = PyString_AsDecodedString(str, encoding, errors);
316 Py_DECREF(str);
317 return v;
318}
319
320PyObject *PyString_AsDecodedObject(PyObject *str,
321 const char *encoding,
322 const char *errors)
323{
324 PyObject *v;
325
326 if (!PyString_Check(str)) {
327 PyErr_BadArgument();
328 goto onError;
329 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000330
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000331 if (encoding == NULL) {
332#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000333 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000334#else
335 PyErr_SetString(PyExc_ValueError, "no encoding specified");
336 goto onError;
337#endif
338 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000339
340 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000341 v = PyCodec_Decode(str, encoding, errors);
342 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000343 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000344
345 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000346
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000347 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000348 return NULL;
349}
350
351PyObject *PyString_AsDecodedString(PyObject *str,
352 const char *encoding,
353 const char *errors)
354{
355 PyObject *v;
356
357 v = PyString_AsDecodedObject(str, encoding, errors);
358 if (v == NULL)
359 goto onError;
360
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000361#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 /* Convert Unicode to a string using the default encoding */
363 if (PyUnicode_Check(v)) {
364 PyObject *temp = v;
365 v = PyUnicode_AsEncodedString(v, NULL, NULL);
366 Py_DECREF(temp);
367 if (v == NULL)
368 goto onError;
369 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000370#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000371 if (!PyString_Check(v)) {
372 PyErr_Format(PyExc_TypeError,
373 "decoder did not return a string object (type=%.400s)",
374 v->ob_type->tp_name);
375 Py_DECREF(v);
376 goto onError;
377 }
378
379 return v;
380
381 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 return NULL;
383}
384
385PyObject *PyString_Encode(const char *s,
386 int size,
387 const char *encoding,
388 const char *errors)
389{
390 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 str = PyString_FromStringAndSize(s, size);
393 if (str == NULL)
394 return NULL;
395 v = PyString_AsEncodedString(str, encoding, errors);
396 Py_DECREF(str);
397 return v;
398}
399
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000400PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000401 const char *encoding,
402 const char *errors)
403{
404 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000405
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000406 if (!PyString_Check(str)) {
407 PyErr_BadArgument();
408 goto onError;
409 }
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411 if (encoding == NULL) {
412#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000413 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000414#else
415 PyErr_SetString(PyExc_ValueError, "no encoding specified");
416 goto onError;
417#endif
418 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000419
420 /* Encode via the codec registry */
421 v = PyCodec_Encode(str, encoding, errors);
422 if (v == NULL)
423 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000424
425 return v;
426
427 onError:
428 return NULL;
429}
430
431PyObject *PyString_AsEncodedString(PyObject *str,
432 const char *encoding,
433 const char *errors)
434{
435 PyObject *v;
436
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000437 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000438 if (v == NULL)
439 goto onError;
440
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000441#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 /* Convert Unicode to a string using the default encoding */
443 if (PyUnicode_Check(v)) {
444 PyObject *temp = v;
445 v = PyUnicode_AsEncodedString(v, NULL, NULL);
446 Py_DECREF(temp);
447 if (v == NULL)
448 goto onError;
449 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000450#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 if (!PyString_Check(v)) {
452 PyErr_Format(PyExc_TypeError,
453 "encoder did not return a string object (type=%.400s)",
454 v->ob_type->tp_name);
455 Py_DECREF(v);
456 goto onError;
457 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000458
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000460
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000461 onError:
462 return NULL;
463}
464
Guido van Rossum234f9421993-06-17 12:35:49 +0000465static void
Fred Drakeba096332000-07-09 07:04:36 +0000466string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000467{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000468 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000469}
470
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000471static int
472string_getsize(register PyObject *op)
473{
474 char *s;
475 int len;
476 if (PyString_AsStringAndSize(op, &s, &len))
477 return -1;
478 return len;
479}
480
481static /*const*/ char *
482string_getbuffer(register PyObject *op)
483{
484 char *s;
485 int len;
486 if (PyString_AsStringAndSize(op, &s, &len))
487 return NULL;
488 return s;
489}
490
Guido van Rossumd7047b31995-01-02 19:07:15 +0000491int
Fred Drakeba096332000-07-09 07:04:36 +0000492PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000494 if (!PyString_Check(op))
495 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497}
498
499/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000500PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (!PyString_Check(op))
503 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000505}
506
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000507int
508PyString_AsStringAndSize(register PyObject *obj,
509 register char **s,
510 register int *len)
511{
512 if (s == NULL) {
513 PyErr_BadInternalCall();
514 return -1;
515 }
516
517 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000518#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (PyUnicode_Check(obj)) {
520 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
521 if (obj == NULL)
522 return -1;
523 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000524 else
525#endif
526 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527 PyErr_Format(PyExc_TypeError,
528 "expected string or Unicode object, "
529 "%.200s found", obj->ob_type->tp_name);
530 return -1;
531 }
532 }
533
534 *s = PyString_AS_STRING(obj);
535 if (len != NULL)
536 *len = PyString_GET_SIZE(obj);
537 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
538 PyErr_SetString(PyExc_TypeError,
539 "expected string without null bytes");
540 return -1;
541 }
542 return 0;
543}
544
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545/* Methods */
546
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000547static int
Fred Drakeba096332000-07-09 07:04:36 +0000548string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000549{
550 int i;
551 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000552 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000553 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000555 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000556 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000557 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000558
Thomas Wouters7e474022000-07-16 12:04:32 +0000559 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000560 quote = '\'';
561 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
562 quote = '"';
563
564 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565 for (i = 0; i < op->ob_size; i++) {
566 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000567 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000569 else if (c == '\t')
570 fprintf(fp, "\\t");
571 else if (c == '\n')
572 fprintf(fp, "\\n");
573 else if (c == '\r')
574 fprintf(fp, "\\r");
575 else if (c < ' ' || c >= 0x7f)
576 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000577 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000580 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000581 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582}
583
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000584static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000585string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000587 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
588 PyObject *v;
589 if (newsize > INT_MAX) {
590 PyErr_SetString(PyExc_OverflowError,
591 "string is too large to make repr");
592 }
593 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000595 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000596 }
597 else {
598 register int i;
599 register char c;
600 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000601 int quote;
602
Thomas Wouters7e474022000-07-16 12:04:32 +0000603 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000604 quote = '\'';
605 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
606 quote = '"';
607
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000608 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 for (i = 0; i < op->ob_size; i++) {
611 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000614 else if (c == '\t')
615 *p++ = '\\', *p++ = 't';
616 else if (c == '\n')
617 *p++ = '\\', *p++ = 'n';
618 else if (c == '\r')
619 *p++ = '\\', *p++ = 'r';
620 else if (c < ' ' || c >= 0x7f) {
621 sprintf(p, "\\x%02x", c & 0xff);
622 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 }
624 else
625 *p++ = c;
626 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000627 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000628 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000629 _PyString_Resize(
630 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000631 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000632 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000633}
634
Guido van Rossum189f1df2001-05-01 16:51:53 +0000635static PyObject *
636string_str(PyObject *s)
637{
638 Py_INCREF(s);
639 return s;
640}
641
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642static int
Fred Drakeba096332000-07-09 07:04:36 +0000643string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644{
645 return a->ob_size;
646}
647
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000648static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000649string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650{
651 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000652 register PyStringObject *op;
653 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000654#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000655 if (PyUnicode_Check(bb))
656 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000657#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000658 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000659 "cannot add type \"%.200s\" to string",
660 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661 return NULL;
662 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000664 /* Optimize cases with empty left or right operand */
665 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 return bb;
668 }
669 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000670 Py_INCREF(a);
671 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 }
673 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000674 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000675 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000676 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000677 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000678 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000679 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000680#ifdef CACHE_HASH
681 op->ob_shash = -1;
682#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000683#ifdef INTERN_STRINGS
684 op->ob_sinterned = NULL;
685#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000686 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
687 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
688 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000689 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690#undef b
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000697 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000699 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700 if (n < 0)
701 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000702 /* watch out for overflows: the size can overflow int,
703 * and the # of bytes needed can overflow size_t
704 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000705 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000706 if (n && size / n != a->ob_size) {
707 PyErr_SetString(PyExc_OverflowError,
708 "repeated string is too long");
709 return NULL;
710 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000712 Py_INCREF(a);
713 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714 }
Tim Peters8f422462000-09-09 06:13:41 +0000715 nbytes = size * sizeof(char);
716 if (nbytes / sizeof(char) != (size_t)size ||
717 nbytes + sizeof(PyStringObject) <= nbytes) {
718 PyErr_SetString(PyExc_OverflowError,
719 "repeated string is too long");
720 return NULL;
721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000723 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000724 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000726 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000727#ifdef CACHE_HASH
728 op->ob_shash = -1;
729#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000730#ifdef INTERN_STRINGS
731 op->ob_sinterned = NULL;
732#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000733 for (i = 0; i < size; i += a->ob_size)
734 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
735 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737}
738
739/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
740
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000742string_slice(register PyStringObject *a, register int i, register int j)
743 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744{
745 if (i < 0)
746 i = 0;
747 if (j < 0)
748 j = 0; /* Avoid signed/unsigned bug in next line */
749 if (j > a->ob_size)
750 j = a->ob_size;
751 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 Py_INCREF(a);
753 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000754 }
755 if (j < i)
756 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000757 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000758}
759
Guido van Rossum9284a572000-03-07 15:53:43 +0000760static int
Fred Drakeba096332000-07-09 07:04:36 +0000761string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000762{
763 register char *s, *end;
764 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000765#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000766 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000767 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000768#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000769 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000770 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000771 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000772 return -1;
773 }
774 c = PyString_AsString(el)[0];
775 s = PyString_AsString(a);
776 end = s + PyString_Size(a);
777 while (s < end) {
778 if (c == *s++)
779 return 1;
780 }
781 return 0;
782}
783
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000784static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000785string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000787 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000788 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000790 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 return NULL;
792 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000793 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000794 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000795 if (v == NULL)
796 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000797 else {
798#ifdef COUNT_ALLOCS
799 one_strings++;
800#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000801 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000802 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000803 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804}
805
Martin v. Löwiscd353062001-05-24 16:56:35 +0000806static PyObject*
807string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000809 int c;
810 int len_a, len_b;
811 int min_len;
812 PyObject *result;
813
814 /* One of the objects is a string object. Make sure the
815 other one is one, too. */
816 if (a->ob_type != b->ob_type) {
817 result = Py_NotImplemented;
818 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000819 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000820 if (a == b) {
821 switch (op) {
822 case Py_EQ:case Py_LE:case Py_GE:
823 result = Py_True;
824 goto out;
825 case Py_NE:case Py_LT:case Py_GT:
826 result = Py_False;
827 goto out;
828 }
829 }
830 if (op == Py_EQ) {
831 /* Supporting Py_NE here as well does not save
832 much time, since Py_NE is rarely used. */
833 if (a->ob_size == b->ob_size
834 && (a->ob_sval[0] == b->ob_sval[0]
835 && memcmp(a->ob_sval, b->ob_sval,
836 a->ob_size) == 0)) {
837 result = Py_True;
838 } else {
839 result = Py_False;
840 }
841 goto out;
842 }
843 len_a = a->ob_size; len_b = b->ob_size;
844 min_len = (len_a < len_b) ? len_a : len_b;
845 if (min_len > 0) {
846 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
847 if (c==0)
848 c = memcmp(a->ob_sval, b->ob_sval, min_len);
849 }else
850 c = 0;
851 if (c == 0)
852 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
853 switch (op) {
854 case Py_LT: c = c < 0; break;
855 case Py_LE: c = c <= 0; break;
856 case Py_EQ: assert(0); break; /* unreachable */
857 case Py_NE: c = c != 0; break;
858 case Py_GT: c = c > 0; break;
859 case Py_GE: c = c >= 0; break;
860 default:
861 result = Py_NotImplemented;
862 goto out;
863 }
864 result = c ? Py_True : Py_False;
865 out:
866 Py_INCREF(result);
867 return result;
868}
869
870int
871_PyString_Eq(PyObject *o1, PyObject *o2)
872{
873 PyStringObject *a, *b;
874 a = (PyStringObject*)o1;
875 b = (PyStringObject*)o2;
876 return a->ob_size == b->ob_size
877 && *a->ob_sval == *b->ob_sval
878 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879}
880
Guido van Rossum9bfef441993-03-29 10:43:31 +0000881static long
Fred Drakeba096332000-07-09 07:04:36 +0000882string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000883{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000884 register int len;
885 register unsigned char *p;
886 register long x;
887
888#ifdef CACHE_HASH
889 if (a->ob_shash != -1)
890 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000891#ifdef INTERN_STRINGS
892 if (a->ob_sinterned != NULL)
893 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000895#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000896#endif
897 len = a->ob_size;
898 p = (unsigned char *) a->ob_sval;
899 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000900 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000901 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000902 x ^= a->ob_size;
903 if (x == -1)
904 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000905#ifdef CACHE_HASH
906 a->ob_shash = x;
907#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000908 return x;
909}
910
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000911static int
Fred Drakeba096332000-07-09 07:04:36 +0000912string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000913{
914 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000915 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000916 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000917 return -1;
918 }
919 *ptr = (void *)self->ob_sval;
920 return self->ob_size;
921}
922
923static int
Fred Drakeba096332000-07-09 07:04:36 +0000924string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000925{
Guido van Rossum045e6881997-09-08 18:30:11 +0000926 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000927 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000928 return -1;
929}
930
931static int
Fred Drakeba096332000-07-09 07:04:36 +0000932string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000933{
934 if ( lenp )
935 *lenp = self->ob_size;
936 return 1;
937}
938
Guido van Rossum1db70701998-10-08 02:18:52 +0000939static int
Fred Drakeba096332000-07-09 07:04:36 +0000940string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000941{
942 if ( index != 0 ) {
943 PyErr_SetString(PyExc_SystemError,
944 "accessing non-existent string segment");
945 return -1;
946 }
947 *ptr = self->ob_sval;
948 return self->ob_size;
949}
950
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000951static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000952 (inquiry)string_length, /*sq_length*/
953 (binaryfunc)string_concat, /*sq_concat*/
954 (intargfunc)string_repeat, /*sq_repeat*/
955 (intargfunc)string_item, /*sq_item*/
956 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000957 0, /*sq_ass_item*/
958 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000959 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000960};
961
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000962static PyBufferProcs string_as_buffer = {
963 (getreadbufferproc)string_buffer_getreadbuf,
964 (getwritebufferproc)string_buffer_getwritebuf,
965 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000966 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000967};
968
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000969
970
971#define LEFTSTRIP 0
972#define RIGHTSTRIP 1
973#define BOTHSTRIP 2
974
975
976static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000977split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000978{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000979 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000980 PyObject* item;
981 PyObject *list = PyList_New(0);
982
983 if (list == NULL)
984 return NULL;
985
Guido van Rossum4c08d552000-03-10 22:55:18 +0000986 for (i = j = 0; i < len; ) {
987 while (i < len && isspace(Py_CHARMASK(s[i])))
988 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000989 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000990 while (i < len && !isspace(Py_CHARMASK(s[i])))
991 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000992 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000993 if (maxsplit-- <= 0)
994 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995 item = PyString_FromStringAndSize(s+j, (int)(i-j));
996 if (item == NULL)
997 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998 err = PyList_Append(list, item);
999 Py_DECREF(item);
1000 if (err < 0)
1001 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001002 while (i < len && isspace(Py_CHARMASK(s[i])))
1003 i++;
1004 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001005 }
1006 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001007 if (j < len) {
1008 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1009 if (item == NULL)
1010 goto finally;
1011 err = PyList_Append(list, item);
1012 Py_DECREF(item);
1013 if (err < 0)
1014 goto finally;
1015 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 return list;
1017 finally:
1018 Py_DECREF(list);
1019 return NULL;
1020}
1021
1022
1023static char split__doc__[] =
1024"S.split([sep [,maxsplit]]) -> list of strings\n\
1025\n\
1026Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001027delimiter string. If maxsplit is given, at most maxsplit\n\
1028splits are done. If sep is not specified, any whitespace string\n\
1029is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030
1031static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001032string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033{
1034 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001035 int maxsplit = -1;
1036 const char *s = PyString_AS_STRING(self), *sub;
1037 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001038
Guido van Rossum4c08d552000-03-10 22:55:18 +00001039 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001041 if (maxsplit < 0)
1042 maxsplit = INT_MAX;
1043 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001045 if (PyString_Check(subobj)) {
1046 sub = PyString_AS_STRING(subobj);
1047 n = PyString_GET_SIZE(subobj);
1048 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001049#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 else if (PyUnicode_Check(subobj))
1051 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001052#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1054 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 if (n == 0) {
1056 PyErr_SetString(PyExc_ValueError, "empty separator");
1057 return NULL;
1058 }
1059
1060 list = PyList_New(0);
1061 if (list == NULL)
1062 return NULL;
1063
1064 i = j = 0;
1065 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001066 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001067 if (maxsplit-- <= 0)
1068 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001069 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1070 if (item == NULL)
1071 goto fail;
1072 err = PyList_Append(list, item);
1073 Py_DECREF(item);
1074 if (err < 0)
1075 goto fail;
1076 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 }
1078 else
1079 i++;
1080 }
1081 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1082 if (item == NULL)
1083 goto fail;
1084 err = PyList_Append(list, item);
1085 Py_DECREF(item);
1086 if (err < 0)
1087 goto fail;
1088
1089 return list;
1090
1091 fail:
1092 Py_DECREF(list);
1093 return NULL;
1094}
1095
1096
1097static char join__doc__[] =
1098"S.join(sequence) -> string\n\
1099\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001100Return a string which is the concatenation of the strings in the\n\
1101sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001102
1103static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001104string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001105{
1106 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001107 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001108 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001109 char *p;
1110 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001111 size_t sz = 0;
1112 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001113 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001114
Tim Peters19fe14e2001-01-19 03:03:47 +00001115 seq = PySequence_Fast(orig, "");
1116 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001117 if (PyErr_ExceptionMatches(PyExc_TypeError))
1118 PyErr_Format(PyExc_TypeError,
1119 "sequence expected, %.80s found",
1120 orig->ob_type->tp_name);
1121 return NULL;
1122 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001123
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001124 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001125 if (seqlen == 0) {
1126 Py_DECREF(seq);
1127 return PyString_FromString("");
1128 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001129 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001130 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001131 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1132 PyErr_Format(PyExc_TypeError,
1133 "sequence item 0: expected string,"
1134 " %.80s found",
1135 item->ob_type->tp_name);
1136 Py_DECREF(seq);
1137 return NULL;
1138 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001139 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001140 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001141 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001143
Tim Peters19fe14e2001-01-19 03:03:47 +00001144 /* There are at least two things to join. Do a pre-pass to figure out
1145 * the total amount of space we'll need (sz), see whether any argument
1146 * is absurd, and defer to the Unicode join if appropriate.
1147 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001148 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001149 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001150 item = PySequence_Fast_GET_ITEM(seq, i);
1151 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001152#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001153 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001154 /* Defer to Unicode join.
1155 * CAUTION: There's no gurantee that the
1156 * original sequence can be iterated over
1157 * again, so we must pass seq here.
1158 */
1159 PyObject *result;
1160 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001161 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001162 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001163 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001164#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001165 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001166 "sequence item %i: expected string,"
1167 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001168 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001169 Py_DECREF(seq);
1170 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001171 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001172 sz += PyString_GET_SIZE(item);
1173 if (i != 0)
1174 sz += seplen;
1175 if (sz < old_sz || sz > INT_MAX) {
1176 PyErr_SetString(PyExc_OverflowError,
1177 "join() is too long for a Python string");
1178 Py_DECREF(seq);
1179 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001181 }
1182
1183 /* Allocate result space. */
1184 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1185 if (res == NULL) {
1186 Py_DECREF(seq);
1187 return NULL;
1188 }
1189
1190 /* Catenate everything. */
1191 p = PyString_AS_STRING(res);
1192 for (i = 0; i < seqlen; ++i) {
1193 size_t n;
1194 item = PySequence_Fast_GET_ITEM(seq, i);
1195 n = PyString_GET_SIZE(item);
1196 memcpy(p, PyString_AS_STRING(item), n);
1197 p += n;
1198 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001199 memcpy(p, sep, seplen);
1200 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001201 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001203
Jeremy Hylton49048292000-07-11 03:28:17 +00001204 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206}
1207
Tim Peters52e155e2001-06-16 05:42:57 +00001208PyObject *
1209_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001210{
Tim Petersa7259592001-06-16 05:11:17 +00001211 assert(sep != NULL && PyString_Check(sep));
1212 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001213 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001214}
1215
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216static long
Fred Drakeba096332000-07-09 07:04:36 +00001217string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001219 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220 int len = PyString_GET_SIZE(self);
1221 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001222 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001224 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001225 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001226 return -2;
1227 if (PyString_Check(subobj)) {
1228 sub = PyString_AS_STRING(subobj);
1229 n = PyString_GET_SIZE(subobj);
1230 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001231#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001232 else if (PyUnicode_Check(subobj))
1233 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001234#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001235 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236 return -2;
1237
1238 if (last > len)
1239 last = len;
1240 if (last < 0)
1241 last += len;
1242 if (last < 0)
1243 last = 0;
1244 if (i < 0)
1245 i += len;
1246 if (i < 0)
1247 i = 0;
1248
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 if (dir > 0) {
1250 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001252 last -= n;
1253 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001254 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 return (long)i;
1256 }
1257 else {
1258 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001259
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 if (n == 0 && i <= last)
1261 return (long)last;
1262 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001263 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264 return (long)j;
1265 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001266
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001267 return -1;
1268}
1269
1270
1271static char find__doc__[] =
1272"S.find(sub [,start [,end]]) -> int\n\
1273\n\
1274Return the lowest index in S where substring sub is found,\n\
1275such that sub is contained within s[start,end]. Optional\n\
1276arguments start and end are interpreted as in slice notation.\n\
1277\n\
1278Return -1 on failure.";
1279
1280static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001281string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001282{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001283 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284 if (result == -2)
1285 return NULL;
1286 return PyInt_FromLong(result);
1287}
1288
1289
1290static char index__doc__[] =
1291"S.index(sub [,start [,end]]) -> int\n\
1292\n\
1293Like S.find() but raise ValueError when the substring is not found.";
1294
1295static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001296string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 if (result == -2)
1300 return NULL;
1301 if (result == -1) {
1302 PyErr_SetString(PyExc_ValueError,
1303 "substring not found in string.index");
1304 return NULL;
1305 }
1306 return PyInt_FromLong(result);
1307}
1308
1309
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310static char rfind__doc__[] =
1311"S.rfind(sub [,start [,end]]) -> int\n\
1312\n\
1313Return the highest index in S where substring sub is found,\n\
1314such that sub is contained within s[start,end]. Optional\n\
1315arguments start and end are interpreted as in slice notation.\n\
1316\n\
1317Return -1 on failure.";
1318
1319static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001320string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 if (result == -2)
1324 return NULL;
1325 return PyInt_FromLong(result);
1326}
1327
1328
1329static char rindex__doc__[] =
1330"S.rindex(sub [,start [,end]]) -> int\n\
1331\n\
1332Like S.rfind() but raise ValueError when the substring is not found.";
1333
1334static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001335string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338 if (result == -2)
1339 return NULL;
1340 if (result == -1) {
1341 PyErr_SetString(PyExc_ValueError,
1342 "substring not found in string.rindex");
1343 return NULL;
1344 }
1345 return PyInt_FromLong(result);
1346}
1347
1348
1349static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001350do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351{
1352 char *s = PyString_AS_STRING(self);
1353 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355 i = 0;
1356 if (striptype != RIGHTSTRIP) {
1357 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1358 i++;
1359 }
1360 }
1361
1362 j = len;
1363 if (striptype != LEFTSTRIP) {
1364 do {
1365 j--;
1366 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1367 j++;
1368 }
1369
1370 if (i == 0 && j == len) {
1371 Py_INCREF(self);
1372 return (PyObject*)self;
1373 }
1374 else
1375 return PyString_FromStringAndSize(s+i, j-i);
1376}
1377
1378
1379static char strip__doc__[] =
1380"S.strip() -> string\n\
1381\n\
1382Return a copy of the string S with leading and trailing\n\
1383whitespace removed.";
1384
1385static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001386string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001388 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389}
1390
1391
1392static char lstrip__doc__[] =
1393"S.lstrip() -> string\n\
1394\n\
1395Return a copy of the string S with leading whitespace removed.";
1396
1397static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001398string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001400 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401}
1402
1403
1404static char rstrip__doc__[] =
1405"S.rstrip() -> string\n\
1406\n\
1407Return a copy of the string S with trailing whitespace removed.";
1408
1409static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001410string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001412 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413}
1414
1415
1416static char lower__doc__[] =
1417"S.lower() -> string\n\
1418\n\
1419Return a copy of the string S converted to lowercase.";
1420
1421static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001422string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423{
1424 char *s = PyString_AS_STRING(self), *s_new;
1425 int i, n = PyString_GET_SIZE(self);
1426 PyObject *new;
1427
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428 new = PyString_FromStringAndSize(NULL, n);
1429 if (new == NULL)
1430 return NULL;
1431 s_new = PyString_AsString(new);
1432 for (i = 0; i < n; i++) {
1433 int c = Py_CHARMASK(*s++);
1434 if (isupper(c)) {
1435 *s_new = tolower(c);
1436 } else
1437 *s_new = c;
1438 s_new++;
1439 }
1440 return new;
1441}
1442
1443
1444static char upper__doc__[] =
1445"S.upper() -> string\n\
1446\n\
1447Return a copy of the string S converted to uppercase.";
1448
1449static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001450string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001451{
1452 char *s = PyString_AS_STRING(self), *s_new;
1453 int i, n = PyString_GET_SIZE(self);
1454 PyObject *new;
1455
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 new = PyString_FromStringAndSize(NULL, n);
1457 if (new == NULL)
1458 return NULL;
1459 s_new = PyString_AsString(new);
1460 for (i = 0; i < n; i++) {
1461 int c = Py_CHARMASK(*s++);
1462 if (islower(c)) {
1463 *s_new = toupper(c);
1464 } else
1465 *s_new = c;
1466 s_new++;
1467 }
1468 return new;
1469}
1470
1471
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472static char title__doc__[] =
1473"S.title() -> string\n\
1474\n\
1475Return a titlecased version of S, i.e. words start with uppercase\n\
1476characters, all remaining cased characters have lowercase.";
1477
1478static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001479string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480{
1481 char *s = PyString_AS_STRING(self), *s_new;
1482 int i, n = PyString_GET_SIZE(self);
1483 int previous_is_cased = 0;
1484 PyObject *new;
1485
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486 new = PyString_FromStringAndSize(NULL, n);
1487 if (new == NULL)
1488 return NULL;
1489 s_new = PyString_AsString(new);
1490 for (i = 0; i < n; i++) {
1491 int c = Py_CHARMASK(*s++);
1492 if (islower(c)) {
1493 if (!previous_is_cased)
1494 c = toupper(c);
1495 previous_is_cased = 1;
1496 } else if (isupper(c)) {
1497 if (previous_is_cased)
1498 c = tolower(c);
1499 previous_is_cased = 1;
1500 } else
1501 previous_is_cased = 0;
1502 *s_new++ = c;
1503 }
1504 return new;
1505}
1506
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507static char capitalize__doc__[] =
1508"S.capitalize() -> string\n\
1509\n\
1510Return a copy of the string S with only its first character\n\
1511capitalized.";
1512
1513static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001514string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515{
1516 char *s = PyString_AS_STRING(self), *s_new;
1517 int i, n = PyString_GET_SIZE(self);
1518 PyObject *new;
1519
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 new = PyString_FromStringAndSize(NULL, n);
1521 if (new == NULL)
1522 return NULL;
1523 s_new = PyString_AsString(new);
1524 if (0 < n) {
1525 int c = Py_CHARMASK(*s++);
1526 if (islower(c))
1527 *s_new = toupper(c);
1528 else
1529 *s_new = c;
1530 s_new++;
1531 }
1532 for (i = 1; i < n; i++) {
1533 int c = Py_CHARMASK(*s++);
1534 if (isupper(c))
1535 *s_new = tolower(c);
1536 else
1537 *s_new = c;
1538 s_new++;
1539 }
1540 return new;
1541}
1542
1543
1544static char count__doc__[] =
1545"S.count(sub[, start[, end]]) -> int\n\
1546\n\
1547Return the number of occurrences of substring sub in string\n\
1548S[start:end]. Optional arguments start and end are\n\
1549interpreted as in slice notation.";
1550
1551static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001552string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 int len = PyString_GET_SIZE(self), n;
1556 int i = 0, last = INT_MAX;
1557 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559
Guido van Rossumc6821402000-05-08 14:08:05 +00001560 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1561 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001563
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 if (PyString_Check(subobj)) {
1565 sub = PyString_AS_STRING(subobj);
1566 n = PyString_GET_SIZE(subobj);
1567 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001568#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001569 else if (PyUnicode_Check(subobj)) {
1570 int count;
1571 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1572 if (count == -1)
1573 return NULL;
1574 else
1575 return PyInt_FromLong((long) count);
1576 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001577#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001578 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1579 return NULL;
1580
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581 if (last > len)
1582 last = len;
1583 if (last < 0)
1584 last += len;
1585 if (last < 0)
1586 last = 0;
1587 if (i < 0)
1588 i += len;
1589 if (i < 0)
1590 i = 0;
1591 m = last + 1 - n;
1592 if (n == 0)
1593 return PyInt_FromLong((long) (m-i));
1594
1595 r = 0;
1596 while (i < m) {
1597 if (!memcmp(s+i, sub, n)) {
1598 r++;
1599 i += n;
1600 } else {
1601 i++;
1602 }
1603 }
1604 return PyInt_FromLong((long) r);
1605}
1606
1607
1608static char swapcase__doc__[] =
1609"S.swapcase() -> string\n\
1610\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001611Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612converted to lowercase and vice versa.";
1613
1614static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001615string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616{
1617 char *s = PyString_AS_STRING(self), *s_new;
1618 int i, n = PyString_GET_SIZE(self);
1619 PyObject *new;
1620
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621 new = PyString_FromStringAndSize(NULL, n);
1622 if (new == NULL)
1623 return NULL;
1624 s_new = PyString_AsString(new);
1625 for (i = 0; i < n; i++) {
1626 int c = Py_CHARMASK(*s++);
1627 if (islower(c)) {
1628 *s_new = toupper(c);
1629 }
1630 else if (isupper(c)) {
1631 *s_new = tolower(c);
1632 }
1633 else
1634 *s_new = c;
1635 s_new++;
1636 }
1637 return new;
1638}
1639
1640
1641static char translate__doc__[] =
1642"S.translate(table [,deletechars]) -> string\n\
1643\n\
1644Return a copy of the string S, where all characters occurring\n\
1645in the optional argument deletechars are removed, and the\n\
1646remaining characters have been mapped through the given\n\
1647translation table, which must be a string of length 256.";
1648
1649static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001650string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001652 register char *input, *output;
1653 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 register int i, c, changed = 0;
1655 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001656 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657 int inlen, tablen, dellen = 0;
1658 PyObject *result;
1659 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001660 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661
Guido van Rossum4c08d552000-03-10 22:55:18 +00001662 if (!PyArg_ParseTuple(args, "O|O:translate",
1663 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665
1666 if (PyString_Check(tableobj)) {
1667 table1 = PyString_AS_STRING(tableobj);
1668 tablen = PyString_GET_SIZE(tableobj);
1669 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001670#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001672 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 parameter; instead a mapping to None will cause characters
1674 to be deleted. */
1675 if (delobj != NULL) {
1676 PyErr_SetString(PyExc_TypeError,
1677 "deletions are implemented differently for unicode");
1678 return NULL;
1679 }
1680 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1681 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001682#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001685
1686 if (delobj != NULL) {
1687 if (PyString_Check(delobj)) {
1688 del_table = PyString_AS_STRING(delobj);
1689 dellen = PyString_GET_SIZE(delobj);
1690 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001691#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 else if (PyUnicode_Check(delobj)) {
1693 PyErr_SetString(PyExc_TypeError,
1694 "deletions are implemented differently for unicode");
1695 return NULL;
1696 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001697#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1699 return NULL;
1700
1701 if (tablen != 256) {
1702 PyErr_SetString(PyExc_ValueError,
1703 "translation table must be 256 characters long");
1704 return NULL;
1705 }
1706 }
1707 else {
1708 del_table = NULL;
1709 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710 }
1711
1712 table = table1;
1713 inlen = PyString_Size(input_obj);
1714 result = PyString_FromStringAndSize((char *)NULL, inlen);
1715 if (result == NULL)
1716 return NULL;
1717 output_start = output = PyString_AsString(result);
1718 input = PyString_AsString(input_obj);
1719
1720 if (dellen == 0) {
1721 /* If no deletions are required, use faster code */
1722 for (i = inlen; --i >= 0; ) {
1723 c = Py_CHARMASK(*input++);
1724 if (Py_CHARMASK((*output++ = table[c])) != c)
1725 changed = 1;
1726 }
1727 if (changed)
1728 return result;
1729 Py_DECREF(result);
1730 Py_INCREF(input_obj);
1731 return input_obj;
1732 }
1733
1734 for (i = 0; i < 256; i++)
1735 trans_table[i] = Py_CHARMASK(table[i]);
1736
1737 for (i = 0; i < dellen; i++)
1738 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1739
1740 for (i = inlen; --i >= 0; ) {
1741 c = Py_CHARMASK(*input++);
1742 if (trans_table[c] != -1)
1743 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1744 continue;
1745 changed = 1;
1746 }
1747 if (!changed) {
1748 Py_DECREF(result);
1749 Py_INCREF(input_obj);
1750 return input_obj;
1751 }
1752 /* Fix the size of the resulting string */
1753 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1754 return NULL;
1755 return result;
1756}
1757
1758
1759/* What follows is used for implementing replace(). Perry Stoll. */
1760
1761/*
1762 mymemfind
1763
1764 strstr replacement for arbitrary blocks of memory.
1765
Barry Warsaw51ac5802000-03-20 16:36:48 +00001766 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767 contents of memory pointed to by PAT. Returns the index into MEM if
1768 found, or -1 if not found. If len of PAT is greater than length of
1769 MEM, the function returns -1.
1770*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001771static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001772mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773{
1774 register int ii;
1775
1776 /* pattern can not occur in the last pat_len-1 chars */
1777 len -= pat_len;
1778
1779 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001780 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 return ii;
1782 }
1783 }
1784 return -1;
1785}
1786
1787/*
1788 mymemcnt
1789
1790 Return the number of distinct times PAT is found in MEM.
1791 meaning mem=1111 and pat==11 returns 2.
1792 mem=11111 and pat==11 also return 2.
1793 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001794static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001795mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796{
1797 register int offset = 0;
1798 int nfound = 0;
1799
1800 while (len >= 0) {
1801 offset = mymemfind(mem, len, pat, pat_len);
1802 if (offset == -1)
1803 break;
1804 mem += offset + pat_len;
1805 len -= offset + pat_len;
1806 nfound++;
1807 }
1808 return nfound;
1809}
1810
1811/*
1812 mymemreplace
1813
Thomas Wouters7e474022000-07-16 12:04:32 +00001814 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 replaced with SUB.
1816
Thomas Wouters7e474022000-07-16 12:04:32 +00001817 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 of PAT in STR, then the original string is returned. Otherwise, a new
1819 string is allocated here and returned.
1820
1821 on return, out_len is:
1822 the length of output string, or
1823 -1 if the input string is returned, or
1824 unchanged if an error occurs (no memory).
1825
1826 return value is:
1827 the new string allocated locally, or
1828 NULL if an error occurred.
1829*/
1830static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001831mymemreplace(const char *str, int len, /* input string */
1832 const char *pat, int pat_len, /* pattern string to find */
1833 const char *sub, int sub_len, /* substitution string */
1834 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001835 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
1837 char *out_s;
1838 char *new_s;
1839 int nfound, offset, new_len;
1840
1841 if (len == 0 || pat_len > len)
1842 goto return_same;
1843
1844 /* find length of output string */
1845 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001846 if (count < 0)
1847 count = INT_MAX;
1848 else if (nfound > count)
1849 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850 if (nfound == 0)
1851 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001852
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001854 if (new_len == 0) {
1855 /* Have to allocate something for the caller to free(). */
1856 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001857 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001858 return NULL;
1859 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001861 else {
1862 assert(new_len > 0);
1863 new_s = (char *)PyMem_MALLOC(new_len);
1864 if (new_s == NULL)
1865 return NULL;
1866 out_s = new_s;
1867
Tim Peters9c012af2001-05-10 00:32:57 +00001868 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001869 /* find index of next instance of pattern */
1870 offset = mymemfind(str, len, pat, pat_len);
1871 if (offset == -1)
1872 break;
1873
1874 /* copy non matching part of input string */
1875 memcpy(new_s, str, offset);
1876 str += offset + pat_len;
1877 len -= offset + pat_len;
1878
1879 /* copy substitute into the output string */
1880 new_s += offset;
1881 memcpy(new_s, sub, sub_len);
1882 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001883 }
1884 /* copy any remaining values into output string */
1885 if (len > 0)
1886 memcpy(new_s, str, len);
1887 }
1888 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 return out_s;
1890
1891 return_same:
1892 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001893 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894}
1895
1896
1897static char replace__doc__[] =
1898"S.replace (old, new[, maxsplit]) -> string\n\
1899\n\
1900Return a copy of string S with all occurrences of substring\n\
1901old replaced by new. If the optional argument maxsplit is\n\
1902given, only the first maxsplit occurrences are replaced.";
1903
1904static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001905string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001907 const char *str = PyString_AS_STRING(self), *sub, *repl;
1908 char *new_s;
1909 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1910 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001912 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
Guido van Rossum4c08d552000-03-10 22:55:18 +00001914 if (!PyArg_ParseTuple(args, "OO|i:replace",
1915 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001917
1918 if (PyString_Check(subobj)) {
1919 sub = PyString_AS_STRING(subobj);
1920 sub_len = PyString_GET_SIZE(subobj);
1921 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001922#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001924 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001926#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001927 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1928 return NULL;
1929
1930 if (PyString_Check(replobj)) {
1931 repl = PyString_AS_STRING(replobj);
1932 repl_len = PyString_GET_SIZE(replobj);
1933 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001934#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001935 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001936 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001938#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1940 return NULL;
1941
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001942 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001943 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 return NULL;
1945 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947 if (new_s == NULL) {
1948 PyErr_NoMemory();
1949 return NULL;
1950 }
1951 if (out_len == -1) {
1952 /* we're returning another reference to self */
1953 new = (PyObject*)self;
1954 Py_INCREF(new);
1955 }
1956 else {
1957 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001958 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 }
1960 return new;
1961}
1962
1963
1964static char startswith__doc__[] =
1965"S.startswith(prefix[, start[, end]]) -> int\n\
1966\n\
1967Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1968optional start, test S beginning at that position. With optional end, stop\n\
1969comparing S at that position.";
1970
1971static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001972string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001974 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977 int plen;
1978 int start = 0;
1979 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001980 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981
Guido van Rossumc6821402000-05-08 14:08:05 +00001982 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1983 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 return NULL;
1985 if (PyString_Check(subobj)) {
1986 prefix = PyString_AS_STRING(subobj);
1987 plen = PyString_GET_SIZE(subobj);
1988 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001989#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001990 else if (PyUnicode_Check(subobj)) {
1991 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001992 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001993 subobj, start, end, -1);
1994 if (rc == -1)
1995 return NULL;
1996 else
1997 return PyInt_FromLong((long) rc);
1998 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001999#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002000 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 return NULL;
2002
2003 /* adopt Java semantics for index out of range. it is legal for
2004 * offset to be == plen, but this only returns true if prefix is
2005 * the empty string.
2006 */
2007 if (start < 0 || start+plen > len)
2008 return PyInt_FromLong(0);
2009
2010 if (!memcmp(str+start, prefix, plen)) {
2011 /* did the match end after the specified end? */
2012 if (end < 0)
2013 return PyInt_FromLong(1);
2014 else if (end - start < plen)
2015 return PyInt_FromLong(0);
2016 else
2017 return PyInt_FromLong(1);
2018 }
2019 else return PyInt_FromLong(0);
2020}
2021
2022
2023static char endswith__doc__[] =
2024"S.endswith(suffix[, start[, end]]) -> int\n\
2025\n\
2026Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2027optional start, test S beginning at that position. With optional end, stop\n\
2028comparing S at that position.";
2029
2030static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002031string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002033 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002035 const char* suffix;
2036 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 int start = 0;
2038 int end = -1;
2039 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002040 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041
Guido van Rossumc6821402000-05-08 14:08:05 +00002042 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2043 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 return NULL;
2045 if (PyString_Check(subobj)) {
2046 suffix = PyString_AS_STRING(subobj);
2047 slen = PyString_GET_SIZE(subobj);
2048 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002049#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002050 else if (PyUnicode_Check(subobj)) {
2051 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002052 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002053 subobj, start, end, +1);
2054 if (rc == -1)
2055 return NULL;
2056 else
2057 return PyInt_FromLong((long) rc);
2058 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002059#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061 return NULL;
2062
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return PyInt_FromLong(0);
2065
2066 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070 return PyInt_FromLong(1);
2071 else return PyInt_FromLong(0);
2072}
2073
2074
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002075static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002076"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002077\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002078Encodes S using the codec registered for encoding. encoding defaults\n\
2079to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002080handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2081a ValueError. Other possible values are 'ignore' and 'replace'.";
2082
2083static PyObject *
2084string_encode(PyStringObject *self, PyObject *args)
2085{
2086 char *encoding = NULL;
2087 char *errors = NULL;
2088 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2089 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002090 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2091}
2092
2093
2094static char decode__doc__[] =
2095"S.decode([encoding[,errors]]) -> object\n\
2096\n\
2097Decodes S using the codec registered for encoding. encoding defaults\n\
2098to the default encoding. errors may be given to set a different error\n\
2099handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2100a ValueError. Other possible values are 'ignore' and 'replace'.";
2101
2102static PyObject *
2103string_decode(PyStringObject *self, PyObject *args)
2104{
2105 char *encoding = NULL;
2106 char *errors = NULL;
2107 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2108 return NULL;
2109 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002110}
2111
2112
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113static char expandtabs__doc__[] =
2114"S.expandtabs([tabsize]) -> string\n\
2115\n\
2116Return a copy of S where all tab characters are expanded using spaces.\n\
2117If tabsize is not given, a tab size of 8 characters is assumed.";
2118
2119static PyObject*
2120string_expandtabs(PyStringObject *self, PyObject *args)
2121{
2122 const char *e, *p;
2123 char *q;
2124 int i, j;
2125 PyObject *u;
2126 int tabsize = 8;
2127
2128 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2129 return NULL;
2130
Thomas Wouters7e474022000-07-16 12:04:32 +00002131 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 i = j = 0;
2133 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2134 for (p = PyString_AS_STRING(self); p < e; p++)
2135 if (*p == '\t') {
2136 if (tabsize > 0)
2137 j += tabsize - (j % tabsize);
2138 }
2139 else {
2140 j++;
2141 if (*p == '\n' || *p == '\r') {
2142 i += j;
2143 j = 0;
2144 }
2145 }
2146
2147 /* Second pass: create output string and fill it */
2148 u = PyString_FromStringAndSize(NULL, i + j);
2149 if (!u)
2150 return NULL;
2151
2152 j = 0;
2153 q = PyString_AS_STRING(u);
2154
2155 for (p = PyString_AS_STRING(self); p < e; p++)
2156 if (*p == '\t') {
2157 if (tabsize > 0) {
2158 i = tabsize - (j % tabsize);
2159 j += i;
2160 while (i--)
2161 *q++ = ' ';
2162 }
2163 }
2164 else {
2165 j++;
2166 *q++ = *p;
2167 if (*p == '\n' || *p == '\r')
2168 j = 0;
2169 }
2170
2171 return u;
2172}
2173
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002174static
2175PyObject *pad(PyStringObject *self,
2176 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002177 int right,
2178 char fill)
2179{
2180 PyObject *u;
2181
2182 if (left < 0)
2183 left = 0;
2184 if (right < 0)
2185 right = 0;
2186
2187 if (left == 0 && right == 0) {
2188 Py_INCREF(self);
2189 return (PyObject *)self;
2190 }
2191
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002192 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 left + PyString_GET_SIZE(self) + right);
2194 if (u) {
2195 if (left)
2196 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002197 memcpy(PyString_AS_STRING(u) + left,
2198 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 PyString_GET_SIZE(self));
2200 if (right)
2201 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2202 fill, right);
2203 }
2204
2205 return u;
2206}
2207
2208static char ljust__doc__[] =
2209"S.ljust(width) -> string\n\
2210\n\
2211Return S left justified in a string of length width. Padding is\n\
2212done using spaces.";
2213
2214static PyObject *
2215string_ljust(PyStringObject *self, PyObject *args)
2216{
2217 int width;
2218 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2219 return NULL;
2220
2221 if (PyString_GET_SIZE(self) >= width) {
2222 Py_INCREF(self);
2223 return (PyObject*) self;
2224 }
2225
2226 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2227}
2228
2229
2230static char rjust__doc__[] =
2231"S.rjust(width) -> string\n\
2232\n\
2233Return S right justified in a string of length width. Padding is\n\
2234done using spaces.";
2235
2236static PyObject *
2237string_rjust(PyStringObject *self, PyObject *args)
2238{
2239 int width;
2240 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2241 return NULL;
2242
2243 if (PyString_GET_SIZE(self) >= width) {
2244 Py_INCREF(self);
2245 return (PyObject*) self;
2246 }
2247
2248 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2249}
2250
2251
2252static char center__doc__[] =
2253"S.center(width) -> string\n\
2254\n\
2255Return S centered in a string of length width. Padding is done\n\
2256using spaces.";
2257
2258static PyObject *
2259string_center(PyStringObject *self, PyObject *args)
2260{
2261 int marg, left;
2262 int width;
2263
2264 if (!PyArg_ParseTuple(args, "i:center", &width))
2265 return NULL;
2266
2267 if (PyString_GET_SIZE(self) >= width) {
2268 Py_INCREF(self);
2269 return (PyObject*) self;
2270 }
2271
2272 marg = width - PyString_GET_SIZE(self);
2273 left = marg / 2 + (marg & width & 1);
2274
2275 return pad(self, left, marg - left, ' ');
2276}
2277
2278#if 0
2279static char zfill__doc__[] =
2280"S.zfill(width) -> string\n\
2281\n\
2282Pad a numeric string x with zeros on the left, to fill a field\n\
2283of the specified width. The string x is never truncated.";
2284
2285static PyObject *
2286string_zfill(PyStringObject *self, PyObject *args)
2287{
2288 int fill;
2289 PyObject *u;
2290 char *str;
2291
2292 int width;
2293 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2294 return NULL;
2295
2296 if (PyString_GET_SIZE(self) >= width) {
2297 Py_INCREF(self);
2298 return (PyObject*) self;
2299 }
2300
2301 fill = width - PyString_GET_SIZE(self);
2302
2303 u = pad(self, fill, 0, '0');
2304 if (u == NULL)
2305 return NULL;
2306
2307 str = PyString_AS_STRING(u);
2308 if (str[fill] == '+' || str[fill] == '-') {
2309 /* move sign to beginning of string */
2310 str[0] = str[fill];
2311 str[fill] = '0';
2312 }
2313
2314 return u;
2315}
2316#endif
2317
2318static char isspace__doc__[] =
2319"S.isspace() -> int\n\
2320\n\
2321Return 1 if there are only whitespace characters in S,\n\
23220 otherwise.";
2323
2324static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002325string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326{
Fred Drakeba096332000-07-09 07:04:36 +00002327 register const unsigned char *p
2328 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002329 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002330
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 /* Shortcut for single character strings */
2332 if (PyString_GET_SIZE(self) == 1 &&
2333 isspace(*p))
2334 return PyInt_FromLong(1);
2335
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002336 /* Special case for empty strings */
2337 if (PyString_GET_SIZE(self) == 0)
2338 return PyInt_FromLong(0);
2339
Guido van Rossum4c08d552000-03-10 22:55:18 +00002340 e = p + PyString_GET_SIZE(self);
2341 for (; p < e; p++) {
2342 if (!isspace(*p))
2343 return PyInt_FromLong(0);
2344 }
2345 return PyInt_FromLong(1);
2346}
2347
2348
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002349static char isalpha__doc__[] =
2350"S.isalpha() -> int\n\
2351\n\
2352Return 1 if all characters in S are alphabetic\n\
2353and there is at least one character in S, 0 otherwise.";
2354
2355static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002356string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002357{
Fred Drakeba096332000-07-09 07:04:36 +00002358 register const unsigned char *p
2359 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002360 register const unsigned char *e;
2361
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002362 /* Shortcut for single character strings */
2363 if (PyString_GET_SIZE(self) == 1 &&
2364 isalpha(*p))
2365 return PyInt_FromLong(1);
2366
2367 /* Special case for empty strings */
2368 if (PyString_GET_SIZE(self) == 0)
2369 return PyInt_FromLong(0);
2370
2371 e = p + PyString_GET_SIZE(self);
2372 for (; p < e; p++) {
2373 if (!isalpha(*p))
2374 return PyInt_FromLong(0);
2375 }
2376 return PyInt_FromLong(1);
2377}
2378
2379
2380static char isalnum__doc__[] =
2381"S.isalnum() -> int\n\
2382\n\
2383Return 1 if all characters in S are alphanumeric\n\
2384and there is at least one character in S, 0 otherwise.";
2385
2386static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002387string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002388{
Fred Drakeba096332000-07-09 07:04:36 +00002389 register const unsigned char *p
2390 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002391 register const unsigned char *e;
2392
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002393 /* Shortcut for single character strings */
2394 if (PyString_GET_SIZE(self) == 1 &&
2395 isalnum(*p))
2396 return PyInt_FromLong(1);
2397
2398 /* Special case for empty strings */
2399 if (PyString_GET_SIZE(self) == 0)
2400 return PyInt_FromLong(0);
2401
2402 e = p + PyString_GET_SIZE(self);
2403 for (; p < e; p++) {
2404 if (!isalnum(*p))
2405 return PyInt_FromLong(0);
2406 }
2407 return PyInt_FromLong(1);
2408}
2409
2410
Guido van Rossum4c08d552000-03-10 22:55:18 +00002411static char isdigit__doc__[] =
2412"S.isdigit() -> int\n\
2413\n\
2414Return 1 if there are only digit characters in S,\n\
24150 otherwise.";
2416
2417static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002418string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419{
Fred Drakeba096332000-07-09 07:04:36 +00002420 register const unsigned char *p
2421 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002422 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 /* Shortcut for single character strings */
2425 if (PyString_GET_SIZE(self) == 1 &&
2426 isdigit(*p))
2427 return PyInt_FromLong(1);
2428
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002429 /* Special case for empty strings */
2430 if (PyString_GET_SIZE(self) == 0)
2431 return PyInt_FromLong(0);
2432
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 e = p + PyString_GET_SIZE(self);
2434 for (; p < e; p++) {
2435 if (!isdigit(*p))
2436 return PyInt_FromLong(0);
2437 }
2438 return PyInt_FromLong(1);
2439}
2440
2441
2442static char islower__doc__[] =
2443"S.islower() -> int\n\
2444\n\
2445Return 1 if all cased characters in S are lowercase and there is\n\
2446at least one cased character in S, 0 otherwise.";
2447
2448static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002449string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002450{
Fred Drakeba096332000-07-09 07:04:36 +00002451 register const unsigned char *p
2452 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002453 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 int cased;
2455
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456 /* Shortcut for single character strings */
2457 if (PyString_GET_SIZE(self) == 1)
2458 return PyInt_FromLong(islower(*p) != 0);
2459
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002460 /* Special case for empty strings */
2461 if (PyString_GET_SIZE(self) == 0)
2462 return PyInt_FromLong(0);
2463
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 e = p + PyString_GET_SIZE(self);
2465 cased = 0;
2466 for (; p < e; p++) {
2467 if (isupper(*p))
2468 return PyInt_FromLong(0);
2469 else if (!cased && islower(*p))
2470 cased = 1;
2471 }
2472 return PyInt_FromLong(cased);
2473}
2474
2475
2476static char isupper__doc__[] =
2477"S.isupper() -> int\n\
2478\n\
2479Return 1 if all cased characters in S are uppercase and there is\n\
2480at least one cased character in S, 0 otherwise.";
2481
2482static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002483string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484{
Fred Drakeba096332000-07-09 07:04:36 +00002485 register const unsigned char *p
2486 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002487 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002488 int cased;
2489
Guido van Rossum4c08d552000-03-10 22:55:18 +00002490 /* Shortcut for single character strings */
2491 if (PyString_GET_SIZE(self) == 1)
2492 return PyInt_FromLong(isupper(*p) != 0);
2493
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002494 /* Special case for empty strings */
2495 if (PyString_GET_SIZE(self) == 0)
2496 return PyInt_FromLong(0);
2497
Guido van Rossum4c08d552000-03-10 22:55:18 +00002498 e = p + PyString_GET_SIZE(self);
2499 cased = 0;
2500 for (; p < e; p++) {
2501 if (islower(*p))
2502 return PyInt_FromLong(0);
2503 else if (!cased && isupper(*p))
2504 cased = 1;
2505 }
2506 return PyInt_FromLong(cased);
2507}
2508
2509
2510static char istitle__doc__[] =
2511"S.istitle() -> int\n\
2512\n\
2513Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2514may only follow uncased characters and lowercase characters only cased\n\
2515ones. Return 0 otherwise.";
2516
2517static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002518string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519{
Fred Drakeba096332000-07-09 07:04:36 +00002520 register const unsigned char *p
2521 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002522 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002523 int cased, previous_is_cased;
2524
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 /* Shortcut for single character strings */
2526 if (PyString_GET_SIZE(self) == 1)
2527 return PyInt_FromLong(isupper(*p) != 0);
2528
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002529 /* Special case for empty strings */
2530 if (PyString_GET_SIZE(self) == 0)
2531 return PyInt_FromLong(0);
2532
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 e = p + PyString_GET_SIZE(self);
2534 cased = 0;
2535 previous_is_cased = 0;
2536 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002537 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538
2539 if (isupper(ch)) {
2540 if (previous_is_cased)
2541 return PyInt_FromLong(0);
2542 previous_is_cased = 1;
2543 cased = 1;
2544 }
2545 else if (islower(ch)) {
2546 if (!previous_is_cased)
2547 return PyInt_FromLong(0);
2548 previous_is_cased = 1;
2549 cased = 1;
2550 }
2551 else
2552 previous_is_cased = 0;
2553 }
2554 return PyInt_FromLong(cased);
2555}
2556
2557
2558static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002559"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560\n\
2561Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002562Line breaks are not included in the resulting list unless keepends\n\
2563is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564
2565#define SPLIT_APPEND(data, left, right) \
2566 str = PyString_FromStringAndSize(data + left, right - left); \
2567 if (!str) \
2568 goto onError; \
2569 if (PyList_Append(list, str)) { \
2570 Py_DECREF(str); \
2571 goto onError; \
2572 } \
2573 else \
2574 Py_DECREF(str);
2575
2576static PyObject*
2577string_splitlines(PyStringObject *self, PyObject *args)
2578{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002579 register int i;
2580 register int j;
2581 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002582 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583 PyObject *list;
2584 PyObject *str;
2585 char *data;
2586
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002587 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588 return NULL;
2589
2590 data = PyString_AS_STRING(self);
2591 len = PyString_GET_SIZE(self);
2592
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 list = PyList_New(0);
2594 if (!list)
2595 goto onError;
2596
2597 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002598 int eol;
2599
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600 /* Find a line and append it */
2601 while (i < len && data[i] != '\n' && data[i] != '\r')
2602 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603
2604 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002605 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 if (i < len) {
2607 if (data[i] == '\r' && i + 1 < len &&
2608 data[i+1] == '\n')
2609 i += 2;
2610 else
2611 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002612 if (keepends)
2613 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002615 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 j = i;
2617 }
2618 if (j < len) {
2619 SPLIT_APPEND(data, j, len);
2620 }
2621
2622 return list;
2623
2624 onError:
2625 Py_DECREF(list);
2626 return NULL;
2627}
2628
2629#undef SPLIT_APPEND
2630
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002631
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002632static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002633string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002634 /* Counterparts of the obsolete stropmodule functions; except
2635 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002636 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2637 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2638 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2639 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2640 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2641 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2642 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2643 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2644 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2645 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2646 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2647 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2648 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2649 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2650 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2651 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2652 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2653 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2654 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2655 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2656 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2657 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2658 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2659 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2660 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2661 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2662 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2663 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2664 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2665 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2666 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2667 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2668 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002670 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002672 {NULL, NULL} /* sentinel */
2673};
2674
2675static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002676string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002677{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002678 PyObject *x = NULL;
2679 static char *kwlist[] = {"object", 0};
2680
2681 assert(type == &PyString_Type);
2682 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2683 return NULL;
2684 if (x == NULL)
2685 return PyString_FromString("");
2686 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687}
2688
Tim Peters6d6c1a32001-08-02 04:15:00 +00002689static char string_doc[] =
2690"str(object) -> string\n\
2691\n\
2692Return a nice string representation of the object.\n\
2693If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002694
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002695PyTypeObject PyString_Type = {
2696 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002697 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002698 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002699 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002700 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002701 (destructor)string_dealloc, /* tp_dealloc */
2702 (printfunc)string_print, /* tp_print */
2703 0, /* tp_getattr */
2704 0, /* tp_setattr */
2705 0, /* tp_compare */
2706 (reprfunc)string_repr, /* tp_repr */
2707 0, /* tp_as_number */
2708 &string_as_sequence, /* tp_as_sequence */
2709 0, /* tp_as_mapping */
2710 (hashfunc)string_hash, /* tp_hash */
2711 0, /* tp_call */
2712 (reprfunc)string_str, /* tp_str */
2713 PyObject_GenericGetAttr, /* tp_getattro */
2714 0, /* tp_setattro */
2715 &string_as_buffer, /* tp_as_buffer */
2716 Py_TPFLAGS_DEFAULT, /* tp_flags */
2717 string_doc, /* tp_doc */
2718 0, /* tp_traverse */
2719 0, /* tp_clear */
2720 (richcmpfunc)string_richcompare, /* tp_richcompare */
2721 0, /* tp_weaklistoffset */
2722 0, /* tp_iter */
2723 0, /* tp_iternext */
2724 string_methods, /* tp_methods */
2725 0, /* tp_members */
2726 0, /* tp_getset */
2727 0, /* tp_base */
2728 0, /* tp_dict */
2729 0, /* tp_descr_get */
2730 0, /* tp_descr_set */
2731 0, /* tp_dictoffset */
2732 0, /* tp_init */
2733 0, /* tp_alloc */
2734 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002735};
2736
2737void
Fred Drakeba096332000-07-09 07:04:36 +00002738PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002739{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002740 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002741 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002742 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 if (w == NULL || !PyString_Check(*pv)) {
2744 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002745 *pv = NULL;
2746 return;
2747 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002748 v = string_concat((PyStringObject *) *pv, w);
2749 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750 *pv = v;
2751}
2752
Guido van Rossum013142a1994-08-30 08:19:36 +00002753void
Fred Drakeba096332000-07-09 07:04:36 +00002754PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002755{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 PyString_Concat(pv, w);
2757 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002758}
2759
2760
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002761/* The following function breaks the notion that strings are immutable:
2762 it changes the size of a string. We get away with this only if there
2763 is only one module referencing the object. You can also think of it
2764 as creating a new string object and destroying the old one, only
2765 more efficiently. In any case, don't use this if the string may
2766 already be known to some other part of the code... */
2767
2768int
Fred Drakeba096332000-07-09 07:04:36 +00002769_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002770{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 register PyObject *v;
2772 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002773 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002774 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002775 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002776 Py_DECREF(v);
2777 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002778 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002779 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002780 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002781#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002782 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002783#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 _Py_ForgetReference(v);
2785 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002786 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002788 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002789 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002791 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002792 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 _Py_NewReference(*pv);
2794 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002795 sv->ob_size = newsize;
2796 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002797 return 0;
2798}
Guido van Rossume5372401993-03-16 12:15:04 +00002799
2800/* Helpers for formatstring */
2801
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002803getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002804{
2805 int argidx = *p_argidx;
2806 if (argidx < arglen) {
2807 (*p_argidx)++;
2808 if (arglen < 0)
2809 return args;
2810 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002812 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002813 PyErr_SetString(PyExc_TypeError,
2814 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002815 return NULL;
2816}
2817
Tim Peters38fd5b62000-09-21 05:43:11 +00002818/* Format codes
2819 * F_LJUST '-'
2820 * F_SIGN '+'
2821 * F_BLANK ' '
2822 * F_ALT '#'
2823 * F_ZERO '0'
2824 */
Guido van Rossume5372401993-03-16 12:15:04 +00002825#define F_LJUST (1<<0)
2826#define F_SIGN (1<<1)
2827#define F_BLANK (1<<2)
2828#define F_ALT (1<<3)
2829#define F_ZERO (1<<4)
2830
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002831static int
Fred Drakeba096332000-07-09 07:04:36 +00002832formatfloat(char *buf, size_t buflen, int flags,
2833 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002834{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002835 /* fmt = '%#.' + `prec` + `type`
2836 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002837 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002838 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002839 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002840 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002841 if (prec < 0)
2842 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002843 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2844 type = 'g';
2845 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002846 /* worst case length calc to ensure no buffer overrun:
2847 fmt = %#.<prec>g
2848 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002849 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002850 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2851 If prec=0 the effective precision is 1 (the leading digit is
2852 always given), therefore increase by one to 10+prec. */
2853 if (buflen <= (size_t)10 + (size_t)prec) {
2854 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002855 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002856 return -1;
2857 }
Guido van Rossume5372401993-03-16 12:15:04 +00002858 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002859 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002860}
2861
Tim Peters38fd5b62000-09-21 05:43:11 +00002862/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2863 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2864 * Python's regular ints.
2865 * Return value: a new PyString*, or NULL if error.
2866 * . *pbuf is set to point into it,
2867 * *plen set to the # of chars following that.
2868 * Caller must decref it when done using pbuf.
2869 * The string starting at *pbuf is of the form
2870 * "-"? ("0x" | "0X")? digit+
2871 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002872 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002873 * There will be at least prec digits, zero-filled on the left if
2874 * necessary to get that many.
2875 * val object to be converted
2876 * flags bitmask of format flags; only F_ALT is looked at
2877 * prec minimum number of digits; 0-fill on left if needed
2878 * type a character in [duoxX]; u acts the same as d
2879 *
2880 * CAUTION: o, x and X conversions on regular ints can never
2881 * produce a '-' sign, but can for Python's unbounded ints.
2882 */
2883PyObject*
2884_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2885 char **pbuf, int *plen)
2886{
2887 PyObject *result = NULL;
2888 char *buf;
2889 int i;
2890 int sign; /* 1 if '-', else 0 */
2891 int len; /* number of characters */
2892 int numdigits; /* len == numnondigits + numdigits */
2893 int numnondigits = 0;
2894
2895 switch (type) {
2896 case 'd':
2897 case 'u':
2898 result = val->ob_type->tp_str(val);
2899 break;
2900 case 'o':
2901 result = val->ob_type->tp_as_number->nb_oct(val);
2902 break;
2903 case 'x':
2904 case 'X':
2905 numnondigits = 2;
2906 result = val->ob_type->tp_as_number->nb_hex(val);
2907 break;
2908 default:
2909 assert(!"'type' not in [duoxX]");
2910 }
2911 if (!result)
2912 return NULL;
2913
2914 /* To modify the string in-place, there can only be one reference. */
2915 if (result->ob_refcnt != 1) {
2916 PyErr_BadInternalCall();
2917 return NULL;
2918 }
2919 buf = PyString_AsString(result);
2920 len = PyString_Size(result);
2921 if (buf[len-1] == 'L') {
2922 --len;
2923 buf[len] = '\0';
2924 }
2925 sign = buf[0] == '-';
2926 numnondigits += sign;
2927 numdigits = len - numnondigits;
2928 assert(numdigits > 0);
2929
Tim Petersfff53252001-04-12 18:38:48 +00002930 /* Get rid of base marker unless F_ALT */
2931 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002932 /* Need to skip 0x, 0X or 0. */
2933 int skipped = 0;
2934 switch (type) {
2935 case 'o':
2936 assert(buf[sign] == '0');
2937 /* If 0 is only digit, leave it alone. */
2938 if (numdigits > 1) {
2939 skipped = 1;
2940 --numdigits;
2941 }
2942 break;
2943 case 'x':
2944 case 'X':
2945 assert(buf[sign] == '0');
2946 assert(buf[sign + 1] == 'x');
2947 skipped = 2;
2948 numnondigits -= 2;
2949 break;
2950 }
2951 if (skipped) {
2952 buf += skipped;
2953 len -= skipped;
2954 if (sign)
2955 buf[0] = '-';
2956 }
2957 assert(len == numnondigits + numdigits);
2958 assert(numdigits > 0);
2959 }
2960
2961 /* Fill with leading zeroes to meet minimum width. */
2962 if (prec > numdigits) {
2963 PyObject *r1 = PyString_FromStringAndSize(NULL,
2964 numnondigits + prec);
2965 char *b1;
2966 if (!r1) {
2967 Py_DECREF(result);
2968 return NULL;
2969 }
2970 b1 = PyString_AS_STRING(r1);
2971 for (i = 0; i < numnondigits; ++i)
2972 *b1++ = *buf++;
2973 for (i = 0; i < prec - numdigits; i++)
2974 *b1++ = '0';
2975 for (i = 0; i < numdigits; i++)
2976 *b1++ = *buf++;
2977 *b1 = '\0';
2978 Py_DECREF(result);
2979 result = r1;
2980 buf = PyString_AS_STRING(result);
2981 len = numnondigits + prec;
2982 }
2983
2984 /* Fix up case for hex conversions. */
2985 switch (type) {
2986 case 'x':
2987 /* Need to convert all upper case letters to lower case. */
2988 for (i = 0; i < len; i++)
2989 if (buf[i] >= 'A' && buf[i] <= 'F')
2990 buf[i] += 'a'-'A';
2991 break;
2992 case 'X':
2993 /* Need to convert 0x to 0X (and -0x to -0X). */
2994 if (buf[sign + 1] == 'x')
2995 buf[sign + 1] = 'X';
2996 break;
2997 }
2998 *pbuf = buf;
2999 *plen = len;
3000 return result;
3001}
3002
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003003static int
Fred Drakeba096332000-07-09 07:04:36 +00003004formatint(char *buf, size_t buflen, int flags,
3005 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003006{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003007 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003008 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3009 + 1 + 1 = 24 */
3010 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003011 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003012 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003013 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003014 if (prec < 0)
3015 prec = 1;
3016 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003017 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003018 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003019 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003020 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003021 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003022 return -1;
3023 }
Guido van Rossume5372401993-03-16 12:15:04 +00003024 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003025 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3026 * but we want it (for consistency with other %#x conversions, and
3027 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003028 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3029 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3030 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003031 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003032 if (x == 0 &&
3033 (flags & F_ALT) &&
3034 (type == 'x' || type == 'X') &&
3035 buf[1] != (char)type) /* this last always true under std C */
3036 {
Tim Petersfff53252001-04-12 18:38:48 +00003037 memmove(buf+2, buf, strlen(buf) + 1);
3038 buf[0] = '0';
3039 buf[1] = (char)type;
3040 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003041 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003042}
3043
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003044static int
Fred Drakeba096332000-07-09 07:04:36 +00003045formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003046{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003047 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003048 if (PyString_Check(v)) {
3049 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003050 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003051 }
3052 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003053 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003054 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003055 }
3056 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003057 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003058}
3059
Guido van Rossum013142a1994-08-30 08:19:36 +00003060
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003061/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3062
3063 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3064 chars are formatted. XXX This is a magic number. Each formatting
3065 routine does bounds checking to ensure no overflow, but a better
3066 solution may be to malloc a buffer of appropriate size for each
3067 format. For now, the current solution is sufficient.
3068*/
3069#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003071PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003072PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003073{
3074 char *fmt, *res;
3075 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003076 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003077 PyObject *result, *orig_args;
3078#ifdef Py_USING_UNICODE
3079 PyObject *v, *w;
3080#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 PyObject *dict = NULL;
3082 if (format == NULL || !PyString_Check(format) || args == NULL) {
3083 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003084 return NULL;
3085 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003086 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003087 fmt = PyString_AsString(format);
3088 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003089 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003090 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003091 if (result == NULL)
3092 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093 res = PyString_AsString(result);
3094 if (PyTuple_Check(args)) {
3095 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003096 argidx = 0;
3097 }
3098 else {
3099 arglen = -1;
3100 argidx = -2;
3101 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003102 if (args->ob_type->tp_as_mapping)
3103 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003104 while (--fmtcnt >= 0) {
3105 if (*fmt != '%') {
3106 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003107 rescnt = fmtcnt + 100;
3108 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003109 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003110 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003111 res = PyString_AsString(result)
3112 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003113 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003114 }
3115 *res++ = *fmt++;
3116 }
3117 else {
3118 /* Got a format specifier */
3119 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003120 int width = -1;
3121 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003122 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003123 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003124 PyObject *v = NULL;
3125 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003126 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003127 int sign;
3128 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003129 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003130#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003131 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003132 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003133#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003134
Guido van Rossumda9c2711996-12-05 21:58:58 +00003135 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003136 if (*fmt == '(') {
3137 char *keystart;
3138 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003139 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003140 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003141
3142 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003144 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003145 goto error;
3146 }
3147 ++fmt;
3148 --fmtcnt;
3149 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003150 /* Skip over balanced parentheses */
3151 while (pcount > 0 && --fmtcnt >= 0) {
3152 if (*fmt == ')')
3153 --pcount;
3154 else if (*fmt == '(')
3155 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003156 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003157 }
3158 keylen = fmt - keystart - 1;
3159 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003160 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003161 "incomplete format key");
3162 goto error;
3163 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003164 key = PyString_FromStringAndSize(keystart,
3165 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003166 if (key == NULL)
3167 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003168 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003169 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003170 args_owned = 0;
3171 }
3172 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003173 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003174 if (args == NULL) {
3175 goto error;
3176 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003177 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003178 arglen = -1;
3179 argidx = -2;
3180 }
Guido van Rossume5372401993-03-16 12:15:04 +00003181 while (--fmtcnt >= 0) {
3182 switch (c = *fmt++) {
3183 case '-': flags |= F_LJUST; continue;
3184 case '+': flags |= F_SIGN; continue;
3185 case ' ': flags |= F_BLANK; continue;
3186 case '#': flags |= F_ALT; continue;
3187 case '0': flags |= F_ZERO; continue;
3188 }
3189 break;
3190 }
3191 if (c == '*') {
3192 v = getnextarg(args, arglen, &argidx);
3193 if (v == NULL)
3194 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003195 if (!PyInt_Check(v)) {
3196 PyErr_SetString(PyExc_TypeError,
3197 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003198 goto error;
3199 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003201 if (width < 0) {
3202 flags |= F_LJUST;
3203 width = -width;
3204 }
Guido van Rossume5372401993-03-16 12:15:04 +00003205 if (--fmtcnt >= 0)
3206 c = *fmt++;
3207 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003208 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003209 width = c - '0';
3210 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003211 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003212 if (!isdigit(c))
3213 break;
3214 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 PyErr_SetString(
3216 PyExc_ValueError,
3217 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003218 goto error;
3219 }
3220 width = width*10 + (c - '0');
3221 }
3222 }
3223 if (c == '.') {
3224 prec = 0;
3225 if (--fmtcnt >= 0)
3226 c = *fmt++;
3227 if (c == '*') {
3228 v = getnextarg(args, arglen, &argidx);
3229 if (v == NULL)
3230 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003231 if (!PyInt_Check(v)) {
3232 PyErr_SetString(
3233 PyExc_TypeError,
3234 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003235 goto error;
3236 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003237 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003238 if (prec < 0)
3239 prec = 0;
3240 if (--fmtcnt >= 0)
3241 c = *fmt++;
3242 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003243 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003244 prec = c - '0';
3245 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003246 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003247 if (!isdigit(c))
3248 break;
3249 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250 PyErr_SetString(
3251 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003252 "prec too big");
3253 goto error;
3254 }
3255 prec = prec*10 + (c - '0');
3256 }
3257 }
3258 } /* prec */
3259 if (fmtcnt >= 0) {
3260 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003261 if (--fmtcnt >= 0)
3262 c = *fmt++;
3263 }
3264 }
3265 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 PyErr_SetString(PyExc_ValueError,
3267 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003268 goto error;
3269 }
3270 if (c != '%') {
3271 v = getnextarg(args, arglen, &argidx);
3272 if (v == NULL)
3273 goto error;
3274 }
3275 sign = 0;
3276 fill = ' ';
3277 switch (c) {
3278 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003279 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003280 len = 1;
3281 break;
3282 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003283 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003284#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003285 if (PyUnicode_Check(v)) {
3286 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003287 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003288 goto unicode;
3289 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003290#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003291 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003292 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003293 else
3294 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003295 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003296 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003297 if (!PyString_Check(temp)) {
3298 PyErr_SetString(PyExc_TypeError,
3299 "%s argument has non-string str()");
3300 goto error;
3301 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003302 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003304 if (prec >= 0 && len > prec)
3305 len = prec;
3306 break;
3307 case 'i':
3308 case 'd':
3309 case 'u':
3310 case 'o':
3311 case 'x':
3312 case 'X':
3313 if (c == 'i')
3314 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003315 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003316 temp = _PyString_FormatLong(v, flags,
3317 prec, c, &pbuf, &len);
3318 if (!temp)
3319 goto error;
3320 /* unbounded ints can always produce
3321 a sign character! */
3322 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003323 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003324 else {
3325 pbuf = formatbuf;
3326 len = formatint(pbuf, sizeof(formatbuf),
3327 flags, prec, c, v);
3328 if (len < 0)
3329 goto error;
3330 /* only d conversion is signed */
3331 sign = c == 'd';
3332 }
3333 if (flags & F_ZERO)
3334 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003335 break;
3336 case 'e':
3337 case 'E':
3338 case 'f':
3339 case 'g':
3340 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003341 pbuf = formatbuf;
3342 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003343 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003344 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003345 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003346 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003347 fill = '0';
3348 break;
3349 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003350 pbuf = formatbuf;
3351 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003352 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003353 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003354 break;
3355 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003356 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003357 "unsupported format character '%c' (0x%x) "
3358 "at index %i",
3359 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003360 goto error;
3361 }
3362 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003363 if (*pbuf == '-' || *pbuf == '+') {
3364 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003365 len--;
3366 }
3367 else if (flags & F_SIGN)
3368 sign = '+';
3369 else if (flags & F_BLANK)
3370 sign = ' ';
3371 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003372 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003373 }
3374 if (width < len)
3375 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003376 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003377 reslen -= rescnt;
3378 rescnt = width + fmtcnt + 100;
3379 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003380 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003381 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003382 res = PyString_AsString(result)
3383 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003384 }
3385 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003386 if (fill != ' ')
3387 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003388 rescnt--;
3389 if (width > len)
3390 width--;
3391 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003392 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3393 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003394 assert(pbuf[1] == c);
3395 if (fill != ' ') {
3396 *res++ = *pbuf++;
3397 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003398 }
Tim Petersfff53252001-04-12 18:38:48 +00003399 rescnt -= 2;
3400 width -= 2;
3401 if (width < 0)
3402 width = 0;
3403 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003404 }
3405 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003406 do {
3407 --rescnt;
3408 *res++ = fill;
3409 } while (--width > len);
3410 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003411 if (fill == ' ') {
3412 if (sign)
3413 *res++ = sign;
3414 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003415 (c == 'x' || c == 'X')) {
3416 assert(pbuf[0] == '0');
3417 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003418 *res++ = *pbuf++;
3419 *res++ = *pbuf++;
3420 }
3421 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003422 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003423 res += len;
3424 rescnt -= len;
3425 while (--width >= len) {
3426 --rescnt;
3427 *res++ = ' ';
3428 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003429 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003430 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003431 "not all arguments converted");
3432 goto error;
3433 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003434 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003435 } /* '%' */
3436 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003437 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003438 PyErr_SetString(PyExc_TypeError,
3439 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003440 goto error;
3441 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003442 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003444 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003446 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003447
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003448#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003449 unicode:
3450 if (args_owned) {
3451 Py_DECREF(args);
3452 args_owned = 0;
3453 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003454 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003455 if (PyTuple_Check(orig_args) && argidx > 0) {
3456 PyObject *v;
3457 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3458 v = PyTuple_New(n);
3459 if (v == NULL)
3460 goto error;
3461 while (--n >= 0) {
3462 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3463 Py_INCREF(w);
3464 PyTuple_SET_ITEM(v, n, w);
3465 }
3466 args = v;
3467 } else {
3468 Py_INCREF(orig_args);
3469 args = orig_args;
3470 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003471 args_owned = 1;
3472 /* Take what we have of the result and let the Unicode formatting
3473 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003474 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003475 if (_PyString_Resize(&result, rescnt))
3476 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003477 fmtcnt = PyString_GET_SIZE(format) - \
3478 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003479 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3480 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003481 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003482 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003483 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003484 if (v == NULL)
3485 goto error;
3486 /* Paste what we have (result) to what the Unicode formatting
3487 function returned (v) and return the result (or error) */
3488 w = PyUnicode_Concat(result, v);
3489 Py_DECREF(result);
3490 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003491 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003492 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003493#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003494
Guido van Rossume5372401993-03-16 12:15:04 +00003495 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003497 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003498 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003499 }
Guido van Rossume5372401993-03-16 12:15:04 +00003500 return NULL;
3501}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003502
3503
3504#ifdef INTERN_STRINGS
3505
Barry Warsaw4df762f2000-08-16 23:41:01 +00003506/* This dictionary will leak at PyString_Fini() time. That's acceptable
3507 * because PyString_Fini() specifically frees interned strings that are
3508 * only referenced by this dictionary. The CVS log entry for revision 2.45
3509 * says:
3510 *
3511 * Change the Fini function to only remove otherwise unreferenced
3512 * strings from the interned table. There are references in
3513 * hard-to-find static variables all over the interpreter, and it's not
3514 * worth trying to get rid of all those; but "uninterning" isn't fair
3515 * either and may cause subtle failures later -- so we have to keep them
3516 * in the interned table.
3517 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003518static PyObject *interned;
3519
3520void
Fred Drakeba096332000-07-09 07:04:36 +00003521PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003522{
3523 register PyStringObject *s = (PyStringObject *)(*p);
3524 PyObject *t;
3525 if (s == NULL || !PyString_Check(s))
3526 Py_FatalError("PyString_InternInPlace: strings only please!");
3527 if ((t = s->ob_sinterned) != NULL) {
3528 if (t == (PyObject *)s)
3529 return;
3530 Py_INCREF(t);
3531 *p = t;
3532 Py_DECREF(s);
3533 return;
3534 }
3535 if (interned == NULL) {
3536 interned = PyDict_New();
3537 if (interned == NULL)
3538 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003539 }
3540 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3541 Py_INCREF(t);
3542 *p = s->ob_sinterned = t;
3543 Py_DECREF(s);
3544 return;
3545 }
3546 t = (PyObject *)s;
3547 if (PyDict_SetItem(interned, t, t) == 0) {
3548 s->ob_sinterned = t;
3549 return;
3550 }
3551 PyErr_Clear();
3552}
3553
3554
3555PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003556PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003557{
3558 PyObject *s = PyString_FromString(cp);
3559 if (s == NULL)
3560 return NULL;
3561 PyString_InternInPlace(&s);
3562 return s;
3563}
3564
3565#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003566
3567void
Fred Drakeba096332000-07-09 07:04:36 +00003568PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003569{
3570 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003571 for (i = 0; i < UCHAR_MAX + 1; i++) {
3572 Py_XDECREF(characters[i]);
3573 characters[i] = NULL;
3574 }
3575#ifndef DONT_SHARE_SHORT_STRINGS
3576 Py_XDECREF(nullstring);
3577 nullstring = NULL;
3578#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003579#ifdef INTERN_STRINGS
3580 if (interned) {
3581 int pos, changed;
3582 PyObject *key, *value;
3583 do {
3584 changed = 0;
3585 pos = 0;
3586 while (PyDict_Next(interned, &pos, &key, &value)) {
3587 if (key->ob_refcnt == 2 && key == value) {
3588 PyDict_DelItem(interned, key);
3589 changed = 1;
3590 }
3591 }
3592 } while (changed);
3593 }
3594#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003595}
Barry Warsawa903ad982001-02-23 16:40:48 +00003596
3597#ifdef INTERN_STRINGS
3598void _Py_ReleaseInternedStrings(void)
3599{
3600 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003601 fprintf(stderr, "releasing interned strings\n");
3602 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003603 Py_DECREF(interned);
3604 interned = NULL;
3605 }
3606}
3607#endif /* INTERN_STRINGS */