blob: cfa5f536345886d37e0e681bf57df0415520fda2 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000302 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000303 va_list vargs;
304
305#ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307#else
308 va_start(vargs);
309#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313}
314
315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000316PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
320{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000321 PyObject *v, *str;
322
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
329}
330
331PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
334{
335 PyObject *v;
336
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
340 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000341
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342 if (encoding == NULL) {
343#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000344 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000345#else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348#endif
349 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350
351 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355
356 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 return NULL;
360}
361
362PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v;
367
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
388 }
389
390 return v;
391
392 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 return NULL;
394}
395
396PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
409}
410
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
420 }
421
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422 if (encoding == NULL) {
423#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428#endif
429 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000435
436 return v;
437
438 onError:
439 return NULL;
440}
441
442PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v;
447
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000448 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449 if (v == NULL)
450 goto onError;
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
468 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000471
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 onError:
473 return NULL;
474}
475
Guido van Rossum234f9421993-06-17 12:35:49 +0000476static void
Fred Drakeba096332000-07-09 07:04:36 +0000477string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000478{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000480}
481
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000482static int
483string_getsize(register PyObject *op)
484{
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
490}
491
492static /*const*/ char *
493string_getbuffer(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
500}
501
Guido van Rossumd7047b31995-01-02 19:07:15 +0000502int
Fred Drakeba096332000-07-09 07:04:36 +0000503PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000505 if (!PyString_Check(op))
506 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000511PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000513 if (!PyString_Check(op))
514 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518int
519PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
522{
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
526 }
527
528 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000529#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535 else
536#endif
537 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
542 }
543 }
544
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
552 }
553 return 0;
554}
555
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556/* Methods */
557
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560{
561 int i;
562 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000563 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000567 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569
Thomas Wouters7e474022000-07-16 12:04:32 +0000570 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
574
575 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000589 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000596string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
603 }
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000606 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
608 else {
609 register int i;
610 register char c;
611 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 int quote;
613
Thomas Wouters7e474022000-07-16 12:04:32 +0000614 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 }
635 else
636 *p++ = c;
637 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000642 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644}
645
Guido van Rossum189f1df2001-05-01 16:51:53 +0000646static PyObject *
647string_str(PyObject *s)
648{
649 Py_INCREF(s);
650 return s;
651}
652
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653static int
Fred Drakeba096332000-07-09 07:04:36 +0000654string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655{
656 return a->ob_size;
657}
658
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000660string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661{
662 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000668#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000669 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 return NULL;
673 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000676 if ((a->ob_size == 0 || b->ob_size == 0) &&
677 PyString_CheckExact(a) && PyString_CheckExact(b)) {
678 if (a->ob_size == 0) {
679 Py_INCREF(bb);
680 return bb;
681 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000682 Py_INCREF(a);
683 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684 }
685 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000686 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000688 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000689 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000690 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000691 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000692#ifdef CACHE_HASH
693 op->ob_shash = -1;
694#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000695#ifdef INTERN_STRINGS
696 op->ob_sinterned = NULL;
697#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000698 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
699 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
700 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702#undef b
703}
704
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000705static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000706string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707{
708 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000709 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000711 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712 if (n < 0)
713 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
716 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000718 if (n && size / n != a->ob_size) {
719 PyErr_SetString(PyExc_OverflowError,
720 "repeated string is too long");
721 return NULL;
722 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000723 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 Py_INCREF(a);
725 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 }
Tim Peters8f422462000-09-09 06:13:41 +0000727 nbytes = size * sizeof(char);
728 if (nbytes / sizeof(char) != (size_t)size ||
729 nbytes + sizeof(PyStringObject) <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated string is too long");
732 return NULL;
733 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000734 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000735 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000736 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000738 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000739#ifdef CACHE_HASH
740 op->ob_shash = -1;
741#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000742#ifdef INTERN_STRINGS
743 op->ob_sinterned = NULL;
744#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000745 for (i = 0; i < size; i += a->ob_size)
746 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
747 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749}
750
751/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
752
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000753static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000754string_slice(register PyStringObject *a, register int i, register int j)
755 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756{
757 if (i < 0)
758 i = 0;
759 if (j < 0)
760 j = 0; /* Avoid signed/unsigned bug in next line */
761 if (j > a->ob_size)
762 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000763 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
764 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 Py_INCREF(a);
766 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
768 if (j < i)
769 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771}
772
Guido van Rossum9284a572000-03-07 15:53:43 +0000773static int
Fred Drakeba096332000-07-09 07:04:36 +0000774string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000775{
776 register char *s, *end;
777 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000778#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000779 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000780 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000781#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000782 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000783 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000784 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000785 return -1;
786 }
787 c = PyString_AsString(el)[0];
788 s = PyString_AsString(a);
789 end = s + PyString_Size(a);
790 while (s < end) {
791 if (c == *s++)
792 return 1;
793 }
794 return 0;
795}
796
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000798string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000800 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000801 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000803 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804 return NULL;
805 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000807 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000808 if (v == NULL)
809 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000810 else {
811#ifdef COUNT_ALLOCS
812 one_strings++;
813#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000814 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000815 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000816 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817}
818
Martin v. Löwiscd353062001-05-24 16:56:35 +0000819static PyObject*
820string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000822 int c;
823 int len_a, len_b;
824 int min_len;
825 PyObject *result;
826
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000827 /* Make sure both arguments are strings. */
828 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000829 result = Py_NotImplemented;
830 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000831 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000832 if (a == b) {
833 switch (op) {
834 case Py_EQ:case Py_LE:case Py_GE:
835 result = Py_True;
836 goto out;
837 case Py_NE:case Py_LT:case Py_GT:
838 result = Py_False;
839 goto out;
840 }
841 }
842 if (op == Py_EQ) {
843 /* Supporting Py_NE here as well does not save
844 much time, since Py_NE is rarely used. */
845 if (a->ob_size == b->ob_size
846 && (a->ob_sval[0] == b->ob_sval[0]
847 && memcmp(a->ob_sval, b->ob_sval,
848 a->ob_size) == 0)) {
849 result = Py_True;
850 } else {
851 result = Py_False;
852 }
853 goto out;
854 }
855 len_a = a->ob_size; len_b = b->ob_size;
856 min_len = (len_a < len_b) ? len_a : len_b;
857 if (min_len > 0) {
858 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
859 if (c==0)
860 c = memcmp(a->ob_sval, b->ob_sval, min_len);
861 }else
862 c = 0;
863 if (c == 0)
864 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
865 switch (op) {
866 case Py_LT: c = c < 0; break;
867 case Py_LE: c = c <= 0; break;
868 case Py_EQ: assert(0); break; /* unreachable */
869 case Py_NE: c = c != 0; break;
870 case Py_GT: c = c > 0; break;
871 case Py_GE: c = c >= 0; break;
872 default:
873 result = Py_NotImplemented;
874 goto out;
875 }
876 result = c ? Py_True : Py_False;
877 out:
878 Py_INCREF(result);
879 return result;
880}
881
882int
883_PyString_Eq(PyObject *o1, PyObject *o2)
884{
885 PyStringObject *a, *b;
886 a = (PyStringObject*)o1;
887 b = (PyStringObject*)o2;
888 return a->ob_size == b->ob_size
889 && *a->ob_sval == *b->ob_sval
890 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891}
892
Guido van Rossum9bfef441993-03-29 10:43:31 +0000893static long
Fred Drakeba096332000-07-09 07:04:36 +0000894string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000895{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000896 register int len;
897 register unsigned char *p;
898 register long x;
899
900#ifdef CACHE_HASH
901 if (a->ob_shash != -1)
902 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000903#ifdef INTERN_STRINGS
904 if (a->ob_sinterned != NULL)
905 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000906 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000907#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000908#endif
909 len = a->ob_size;
910 p = (unsigned char *) a->ob_sval;
911 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000912 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000913 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000914 x ^= a->ob_size;
915 if (x == -1)
916 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917#ifdef CACHE_HASH
918 a->ob_shash = x;
919#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000920 return x;
921}
922
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000923static int
Fred Drakeba096332000-07-09 07:04:36 +0000924string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000925{
926 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000927 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000928 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000929 return -1;
930 }
931 *ptr = (void *)self->ob_sval;
932 return self->ob_size;
933}
934
935static int
Fred Drakeba096332000-07-09 07:04:36 +0000936string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000937{
Guido van Rossum045e6881997-09-08 18:30:11 +0000938 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000939 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000940 return -1;
941}
942
943static int
Fred Drakeba096332000-07-09 07:04:36 +0000944string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000945{
946 if ( lenp )
947 *lenp = self->ob_size;
948 return 1;
949}
950
Guido van Rossum1db70701998-10-08 02:18:52 +0000951static int
Fred Drakeba096332000-07-09 07:04:36 +0000952string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000953{
954 if ( index != 0 ) {
955 PyErr_SetString(PyExc_SystemError,
956 "accessing non-existent string segment");
957 return -1;
958 }
959 *ptr = self->ob_sval;
960 return self->ob_size;
961}
962
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000964 (inquiry)string_length, /*sq_length*/
965 (binaryfunc)string_concat, /*sq_concat*/
966 (intargfunc)string_repeat, /*sq_repeat*/
967 (intargfunc)string_item, /*sq_item*/
968 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000969 0, /*sq_ass_item*/
970 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000971 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972};
973
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000974static PyBufferProcs string_as_buffer = {
975 (getreadbufferproc)string_buffer_getreadbuf,
976 (getwritebufferproc)string_buffer_getwritebuf,
977 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000978 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000979};
980
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000981
982
983#define LEFTSTRIP 0
984#define RIGHTSTRIP 1
985#define BOTHSTRIP 2
986
987
988static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000989split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000990{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000991 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000992 PyObject* item;
993 PyObject *list = PyList_New(0);
994
995 if (list == NULL)
996 return NULL;
997
Guido van Rossum4c08d552000-03-10 22:55:18 +0000998 for (i = j = 0; i < len; ) {
999 while (i < len && isspace(Py_CHARMASK(s[i])))
1000 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001002 while (i < len && !isspace(Py_CHARMASK(s[i])))
1003 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001004 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001005 if (maxsplit-- <= 0)
1006 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001007 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1008 if (item == NULL)
1009 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001010 err = PyList_Append(list, item);
1011 Py_DECREF(item);
1012 if (err < 0)
1013 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001014 while (i < len && isspace(Py_CHARMASK(s[i])))
1015 i++;
1016 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017 }
1018 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001019 if (j < len) {
1020 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1021 if (item == NULL)
1022 goto finally;
1023 err = PyList_Append(list, item);
1024 Py_DECREF(item);
1025 if (err < 0)
1026 goto finally;
1027 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 return list;
1029 finally:
1030 Py_DECREF(list);
1031 return NULL;
1032}
1033
1034
1035static char split__doc__[] =
1036"S.split([sep [,maxsplit]]) -> list of strings\n\
1037\n\
1038Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001039delimiter string. If maxsplit is given, at most maxsplit\n\
1040splits are done. If sep is not specified, any whitespace string\n\
1041is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001042
1043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001044string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001045{
1046 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001047 int maxsplit = -1;
1048 const char *s = PyString_AS_STRING(self), *sub;
1049 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050
Guido van Rossum4c08d552000-03-10 22:55:18 +00001051 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 if (maxsplit < 0)
1054 maxsplit = INT_MAX;
1055 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001056 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001057 if (PyString_Check(subobj)) {
1058 sub = PyString_AS_STRING(subobj);
1059 n = PyString_GET_SIZE(subobj);
1060 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001061#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062 else if (PyUnicode_Check(subobj))
1063 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001064#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001065 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1066 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067 if (n == 0) {
1068 PyErr_SetString(PyExc_ValueError, "empty separator");
1069 return NULL;
1070 }
1071
1072 list = PyList_New(0);
1073 if (list == NULL)
1074 return NULL;
1075
1076 i = j = 0;
1077 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001078 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001079 if (maxsplit-- <= 0)
1080 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1082 if (item == NULL)
1083 goto fail;
1084 err = PyList_Append(list, item);
1085 Py_DECREF(item);
1086 if (err < 0)
1087 goto fail;
1088 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089 }
1090 else
1091 i++;
1092 }
1093 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1094 if (item == NULL)
1095 goto fail;
1096 err = PyList_Append(list, item);
1097 Py_DECREF(item);
1098 if (err < 0)
1099 goto fail;
1100
1101 return list;
1102
1103 fail:
1104 Py_DECREF(list);
1105 return NULL;
1106}
1107
1108
1109static char join__doc__[] =
1110"S.join(sequence) -> string\n\
1111\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001112Return a string which is the concatenation of the strings in the\n\
1113sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001114
1115static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001116string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117{
1118 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001119 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001120 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121 char *p;
1122 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001123 size_t sz = 0;
1124 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001125 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126
Tim Peters19fe14e2001-01-19 03:03:47 +00001127 seq = PySequence_Fast(orig, "");
1128 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001129 if (PyErr_ExceptionMatches(PyExc_TypeError))
1130 PyErr_Format(PyExc_TypeError,
1131 "sequence expected, %.80s found",
1132 orig->ob_type->tp_name);
1133 return NULL;
1134 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001135
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001136 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001137 if (seqlen == 0) {
1138 Py_DECREF(seq);
1139 return PyString_FromString("");
1140 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001142 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001143 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1144 PyErr_Format(PyExc_TypeError,
1145 "sequence item 0: expected string,"
1146 " %.80s found",
1147 item->ob_type->tp_name);
1148 Py_DECREF(seq);
1149 return NULL;
1150 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001151 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001152 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001155
Tim Peters19fe14e2001-01-19 03:03:47 +00001156 /* There are at least two things to join. Do a pre-pass to figure out
1157 * the total amount of space we'll need (sz), see whether any argument
1158 * is absurd, and defer to the Unicode join if appropriate.
1159 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001160 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001161 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001162 item = PySequence_Fast_GET_ITEM(seq, i);
1163 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001164#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001165 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001166 /* Defer to Unicode join.
1167 * CAUTION: There's no gurantee that the
1168 * original sequence can be iterated over
1169 * again, so we must pass seq here.
1170 */
1171 PyObject *result;
1172 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001173 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001174 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001175 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001176#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001177 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001178 "sequence item %i: expected string,"
1179 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001181 Py_DECREF(seq);
1182 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001183 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001184 sz += PyString_GET_SIZE(item);
1185 if (i != 0)
1186 sz += seplen;
1187 if (sz < old_sz || sz > INT_MAX) {
1188 PyErr_SetString(PyExc_OverflowError,
1189 "join() is too long for a Python string");
1190 Py_DECREF(seq);
1191 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001192 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001193 }
1194
1195 /* Allocate result space. */
1196 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1197 if (res == NULL) {
1198 Py_DECREF(seq);
1199 return NULL;
1200 }
1201
1202 /* Catenate everything. */
1203 p = PyString_AS_STRING(res);
1204 for (i = 0; i < seqlen; ++i) {
1205 size_t n;
1206 item = PySequence_Fast_GET_ITEM(seq, i);
1207 n = PyString_GET_SIZE(item);
1208 memcpy(p, PyString_AS_STRING(item), n);
1209 p += n;
1210 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001211 memcpy(p, sep, seplen);
1212 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001213 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001215
Jeremy Hylton49048292000-07-11 03:28:17 +00001216 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218}
1219
Tim Peters52e155e2001-06-16 05:42:57 +00001220PyObject *
1221_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001222{
Tim Petersa7259592001-06-16 05:11:17 +00001223 assert(sep != NULL && PyString_Check(sep));
1224 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001225 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001226}
1227
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228static long
Fred Drakeba096332000-07-09 07:04:36 +00001229string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001231 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001232 int len = PyString_GET_SIZE(self);
1233 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001234 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001236 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001237 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001238 return -2;
1239 if (PyString_Check(subobj)) {
1240 sub = PyString_AS_STRING(subobj);
1241 n = PyString_GET_SIZE(subobj);
1242 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001243#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001244 else if (PyUnicode_Check(subobj))
1245 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001246#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001247 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001248 return -2;
1249
1250 if (last > len)
1251 last = len;
1252 if (last < 0)
1253 last += len;
1254 if (last < 0)
1255 last = 0;
1256 if (i < 0)
1257 i += len;
1258 if (i < 0)
1259 i = 0;
1260
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 if (dir > 0) {
1262 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264 last -= n;
1265 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001266 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001267 return (long)i;
1268 }
1269 else {
1270 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001271
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 if (n == 0 && i <= last)
1273 return (long)last;
1274 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001275 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 return (long)j;
1277 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001278
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001279 return -1;
1280}
1281
1282
1283static char find__doc__[] =
1284"S.find(sub [,start [,end]]) -> int\n\
1285\n\
1286Return the lowest index in S where substring sub is found,\n\
1287such that sub is contained within s[start,end]. Optional\n\
1288arguments start and end are interpreted as in slice notation.\n\
1289\n\
1290Return -1 on failure.";
1291
1292static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001293string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 if (result == -2)
1297 return NULL;
1298 return PyInt_FromLong(result);
1299}
1300
1301
1302static char index__doc__[] =
1303"S.index(sub [,start [,end]]) -> int\n\
1304\n\
1305Like S.find() but raise ValueError when the substring is not found.";
1306
1307static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001308string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001310 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 if (result == -2)
1312 return NULL;
1313 if (result == -1) {
1314 PyErr_SetString(PyExc_ValueError,
1315 "substring not found in string.index");
1316 return NULL;
1317 }
1318 return PyInt_FromLong(result);
1319}
1320
1321
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322static char rfind__doc__[] =
1323"S.rfind(sub [,start [,end]]) -> int\n\
1324\n\
1325Return the highest index in S where substring sub is found,\n\
1326such that sub is contained within s[start,end]. Optional\n\
1327arguments start and end are interpreted as in slice notation.\n\
1328\n\
1329Return -1 on failure.";
1330
1331static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001332string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001334 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 if (result == -2)
1336 return NULL;
1337 return PyInt_FromLong(result);
1338}
1339
1340
1341static char rindex__doc__[] =
1342"S.rindex(sub [,start [,end]]) -> int\n\
1343\n\
1344Like S.rfind() but raise ValueError when the substring is not found.";
1345
1346static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001347string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 if (result == -2)
1351 return NULL;
1352 if (result == -1) {
1353 PyErr_SetString(PyExc_ValueError,
1354 "substring not found in string.rindex");
1355 return NULL;
1356 }
1357 return PyInt_FromLong(result);
1358}
1359
1360
1361static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001362do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363{
1364 char *s = PyString_AS_STRING(self);
1365 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 i = 0;
1368 if (striptype != RIGHTSTRIP) {
1369 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1370 i++;
1371 }
1372 }
1373
1374 j = len;
1375 if (striptype != LEFTSTRIP) {
1376 do {
1377 j--;
1378 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1379 j++;
1380 }
1381
Tim Peters8fa5dd02001-09-12 02:18:30 +00001382 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 Py_INCREF(self);
1384 return (PyObject*)self;
1385 }
1386 else
1387 return PyString_FromStringAndSize(s+i, j-i);
1388}
1389
1390
1391static char strip__doc__[] =
1392"S.strip() -> string\n\
1393\n\
1394Return a copy of the string S with leading and trailing\n\
1395whitespace removed.";
1396
1397static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001398string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001400 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401}
1402
1403
1404static char lstrip__doc__[] =
1405"S.lstrip() -> string\n\
1406\n\
1407Return a copy of the string S with leading whitespace removed.";
1408
1409static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001410string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001412 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413}
1414
1415
1416static char rstrip__doc__[] =
1417"S.rstrip() -> string\n\
1418\n\
1419Return a copy of the string S with trailing whitespace removed.";
1420
1421static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001422string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001424 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425}
1426
1427
1428static char lower__doc__[] =
1429"S.lower() -> string\n\
1430\n\
1431Return a copy of the string S converted to lowercase.";
1432
1433static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001434string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
1436 char *s = PyString_AS_STRING(self), *s_new;
1437 int i, n = PyString_GET_SIZE(self);
1438 PyObject *new;
1439
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440 new = PyString_FromStringAndSize(NULL, n);
1441 if (new == NULL)
1442 return NULL;
1443 s_new = PyString_AsString(new);
1444 for (i = 0; i < n; i++) {
1445 int c = Py_CHARMASK(*s++);
1446 if (isupper(c)) {
1447 *s_new = tolower(c);
1448 } else
1449 *s_new = c;
1450 s_new++;
1451 }
1452 return new;
1453}
1454
1455
1456static char upper__doc__[] =
1457"S.upper() -> string\n\
1458\n\
1459Return a copy of the string S converted to uppercase.";
1460
1461static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001462string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463{
1464 char *s = PyString_AS_STRING(self), *s_new;
1465 int i, n = PyString_GET_SIZE(self);
1466 PyObject *new;
1467
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468 new = PyString_FromStringAndSize(NULL, n);
1469 if (new == NULL)
1470 return NULL;
1471 s_new = PyString_AsString(new);
1472 for (i = 0; i < n; i++) {
1473 int c = Py_CHARMASK(*s++);
1474 if (islower(c)) {
1475 *s_new = toupper(c);
1476 } else
1477 *s_new = c;
1478 s_new++;
1479 }
1480 return new;
1481}
1482
1483
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484static char title__doc__[] =
1485"S.title() -> string\n\
1486\n\
1487Return a titlecased version of S, i.e. words start with uppercase\n\
1488characters, all remaining cased characters have lowercase.";
1489
1490static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001491string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492{
1493 char *s = PyString_AS_STRING(self), *s_new;
1494 int i, n = PyString_GET_SIZE(self);
1495 int previous_is_cased = 0;
1496 PyObject *new;
1497
Guido van Rossum4c08d552000-03-10 22:55:18 +00001498 new = PyString_FromStringAndSize(NULL, n);
1499 if (new == NULL)
1500 return NULL;
1501 s_new = PyString_AsString(new);
1502 for (i = 0; i < n; i++) {
1503 int c = Py_CHARMASK(*s++);
1504 if (islower(c)) {
1505 if (!previous_is_cased)
1506 c = toupper(c);
1507 previous_is_cased = 1;
1508 } else if (isupper(c)) {
1509 if (previous_is_cased)
1510 c = tolower(c);
1511 previous_is_cased = 1;
1512 } else
1513 previous_is_cased = 0;
1514 *s_new++ = c;
1515 }
1516 return new;
1517}
1518
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519static char capitalize__doc__[] =
1520"S.capitalize() -> string\n\
1521\n\
1522Return a copy of the string S with only its first character\n\
1523capitalized.";
1524
1525static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001526string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527{
1528 char *s = PyString_AS_STRING(self), *s_new;
1529 int i, n = PyString_GET_SIZE(self);
1530 PyObject *new;
1531
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 new = PyString_FromStringAndSize(NULL, n);
1533 if (new == NULL)
1534 return NULL;
1535 s_new = PyString_AsString(new);
1536 if (0 < n) {
1537 int c = Py_CHARMASK(*s++);
1538 if (islower(c))
1539 *s_new = toupper(c);
1540 else
1541 *s_new = c;
1542 s_new++;
1543 }
1544 for (i = 1; i < n; i++) {
1545 int c = Py_CHARMASK(*s++);
1546 if (isupper(c))
1547 *s_new = tolower(c);
1548 else
1549 *s_new = c;
1550 s_new++;
1551 }
1552 return new;
1553}
1554
1555
1556static char count__doc__[] =
1557"S.count(sub[, start[, end]]) -> int\n\
1558\n\
1559Return the number of occurrences of substring sub in string\n\
1560S[start:end]. Optional arguments start and end are\n\
1561interpreted as in slice notation.";
1562
1563static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001564string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 int len = PyString_GET_SIZE(self), n;
1568 int i = 0, last = INT_MAX;
1569 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001570 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571
Guido van Rossumc6821402000-05-08 14:08:05 +00001572 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1573 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001575
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576 if (PyString_Check(subobj)) {
1577 sub = PyString_AS_STRING(subobj);
1578 n = PyString_GET_SIZE(subobj);
1579 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001580#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001581 else if (PyUnicode_Check(subobj)) {
1582 int count;
1583 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1584 if (count == -1)
1585 return NULL;
1586 else
1587 return PyInt_FromLong((long) count);
1588 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001589#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1591 return NULL;
1592
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593 if (last > len)
1594 last = len;
1595 if (last < 0)
1596 last += len;
1597 if (last < 0)
1598 last = 0;
1599 if (i < 0)
1600 i += len;
1601 if (i < 0)
1602 i = 0;
1603 m = last + 1 - n;
1604 if (n == 0)
1605 return PyInt_FromLong((long) (m-i));
1606
1607 r = 0;
1608 while (i < m) {
1609 if (!memcmp(s+i, sub, n)) {
1610 r++;
1611 i += n;
1612 } else {
1613 i++;
1614 }
1615 }
1616 return PyInt_FromLong((long) r);
1617}
1618
1619
1620static char swapcase__doc__[] =
1621"S.swapcase() -> string\n\
1622\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001623Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624converted to lowercase and vice versa.";
1625
1626static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001627string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628{
1629 char *s = PyString_AS_STRING(self), *s_new;
1630 int i, n = PyString_GET_SIZE(self);
1631 PyObject *new;
1632
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 new = PyString_FromStringAndSize(NULL, n);
1634 if (new == NULL)
1635 return NULL;
1636 s_new = PyString_AsString(new);
1637 for (i = 0; i < n; i++) {
1638 int c = Py_CHARMASK(*s++);
1639 if (islower(c)) {
1640 *s_new = toupper(c);
1641 }
1642 else if (isupper(c)) {
1643 *s_new = tolower(c);
1644 }
1645 else
1646 *s_new = c;
1647 s_new++;
1648 }
1649 return new;
1650}
1651
1652
1653static char translate__doc__[] =
1654"S.translate(table [,deletechars]) -> string\n\
1655\n\
1656Return a copy of the string S, where all characters occurring\n\
1657in the optional argument deletechars are removed, and the\n\
1658remaining characters have been mapped through the given\n\
1659translation table, which must be a string of length 256.";
1660
1661static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001662string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664 register char *input, *output;
1665 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666 register int i, c, changed = 0;
1667 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 int inlen, tablen, dellen = 0;
1670 PyObject *result;
1671 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001673
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 if (!PyArg_ParseTuple(args, "O|O:translate",
1675 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677
1678 if (PyString_Check(tableobj)) {
1679 table1 = PyString_AS_STRING(tableobj);
1680 tablen = PyString_GET_SIZE(tableobj);
1681 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001682#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001684 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001685 parameter; instead a mapping to None will cause characters
1686 to be deleted. */
1687 if (delobj != NULL) {
1688 PyErr_SetString(PyExc_TypeError,
1689 "deletions are implemented differently for unicode");
1690 return NULL;
1691 }
1692 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1693 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001694#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001697
1698 if (delobj != NULL) {
1699 if (PyString_Check(delobj)) {
1700 del_table = PyString_AS_STRING(delobj);
1701 dellen = PyString_GET_SIZE(delobj);
1702 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001703#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 else if (PyUnicode_Check(delobj)) {
1705 PyErr_SetString(PyExc_TypeError,
1706 "deletions are implemented differently for unicode");
1707 return NULL;
1708 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001709#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001710 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1711 return NULL;
1712
1713 if (tablen != 256) {
1714 PyErr_SetString(PyExc_ValueError,
1715 "translation table must be 256 characters long");
1716 return NULL;
1717 }
1718 }
1719 else {
1720 del_table = NULL;
1721 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 }
1723
1724 table = table1;
1725 inlen = PyString_Size(input_obj);
1726 result = PyString_FromStringAndSize((char *)NULL, inlen);
1727 if (result == NULL)
1728 return NULL;
1729 output_start = output = PyString_AsString(result);
1730 input = PyString_AsString(input_obj);
1731
1732 if (dellen == 0) {
1733 /* If no deletions are required, use faster code */
1734 for (i = inlen; --i >= 0; ) {
1735 c = Py_CHARMASK(*input++);
1736 if (Py_CHARMASK((*output++ = table[c])) != c)
1737 changed = 1;
1738 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001739 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 return result;
1741 Py_DECREF(result);
1742 Py_INCREF(input_obj);
1743 return input_obj;
1744 }
1745
1746 for (i = 0; i < 256; i++)
1747 trans_table[i] = Py_CHARMASK(table[i]);
1748
1749 for (i = 0; i < dellen; i++)
1750 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1751
1752 for (i = inlen; --i >= 0; ) {
1753 c = Py_CHARMASK(*input++);
1754 if (trans_table[c] != -1)
1755 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1756 continue;
1757 changed = 1;
1758 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001759 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 Py_DECREF(result);
1761 Py_INCREF(input_obj);
1762 return input_obj;
1763 }
1764 /* Fix the size of the resulting string */
1765 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1766 return NULL;
1767 return result;
1768}
1769
1770
1771/* What follows is used for implementing replace(). Perry Stoll. */
1772
1773/*
1774 mymemfind
1775
1776 strstr replacement for arbitrary blocks of memory.
1777
Barry Warsaw51ac5802000-03-20 16:36:48 +00001778 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779 contents of memory pointed to by PAT. Returns the index into MEM if
1780 found, or -1 if not found. If len of PAT is greater than length of
1781 MEM, the function returns -1.
1782*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001783static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001784mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785{
1786 register int ii;
1787
1788 /* pattern can not occur in the last pat_len-1 chars */
1789 len -= pat_len;
1790
1791 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001792 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793 return ii;
1794 }
1795 }
1796 return -1;
1797}
1798
1799/*
1800 mymemcnt
1801
1802 Return the number of distinct times PAT is found in MEM.
1803 meaning mem=1111 and pat==11 returns 2.
1804 mem=11111 and pat==11 also return 2.
1805 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001806static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001807mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808{
1809 register int offset = 0;
1810 int nfound = 0;
1811
1812 while (len >= 0) {
1813 offset = mymemfind(mem, len, pat, pat_len);
1814 if (offset == -1)
1815 break;
1816 mem += offset + pat_len;
1817 len -= offset + pat_len;
1818 nfound++;
1819 }
1820 return nfound;
1821}
1822
1823/*
1824 mymemreplace
1825
Thomas Wouters7e474022000-07-16 12:04:32 +00001826 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 replaced with SUB.
1828
Thomas Wouters7e474022000-07-16 12:04:32 +00001829 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 of PAT in STR, then the original string is returned. Otherwise, a new
1831 string is allocated here and returned.
1832
1833 on return, out_len is:
1834 the length of output string, or
1835 -1 if the input string is returned, or
1836 unchanged if an error occurs (no memory).
1837
1838 return value is:
1839 the new string allocated locally, or
1840 NULL if an error occurred.
1841*/
1842static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001843mymemreplace(const char *str, int len, /* input string */
1844 const char *pat, int pat_len, /* pattern string to find */
1845 const char *sub, int sub_len, /* substitution string */
1846 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001847 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848{
1849 char *out_s;
1850 char *new_s;
1851 int nfound, offset, new_len;
1852
1853 if (len == 0 || pat_len > len)
1854 goto return_same;
1855
1856 /* find length of output string */
1857 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001858 if (count < 0)
1859 count = INT_MAX;
1860 else if (nfound > count)
1861 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862 if (nfound == 0)
1863 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001864
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001866 if (new_len == 0) {
1867 /* Have to allocate something for the caller to free(). */
1868 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001869 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001870 return NULL;
1871 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001873 else {
1874 assert(new_len > 0);
1875 new_s = (char *)PyMem_MALLOC(new_len);
1876 if (new_s == NULL)
1877 return NULL;
1878 out_s = new_s;
1879
Tim Peters9c012af2001-05-10 00:32:57 +00001880 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001881 /* find index of next instance of pattern */
1882 offset = mymemfind(str, len, pat, pat_len);
1883 if (offset == -1)
1884 break;
1885
1886 /* copy non matching part of input string */
1887 memcpy(new_s, str, offset);
1888 str += offset + pat_len;
1889 len -= offset + pat_len;
1890
1891 /* copy substitute into the output string */
1892 new_s += offset;
1893 memcpy(new_s, sub, sub_len);
1894 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001895 }
1896 /* copy any remaining values into output string */
1897 if (len > 0)
1898 memcpy(new_s, str, len);
1899 }
1900 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901 return out_s;
1902
1903 return_same:
1904 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001905 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
1909static char replace__doc__[] =
1910"S.replace (old, new[, maxsplit]) -> string\n\
1911\n\
1912Return a copy of string S with all occurrences of substring\n\
1913old replaced by new. If the optional argument maxsplit is\n\
1914given, only the first maxsplit occurrences are replaced.";
1915
1916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001917string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001919 const char *str = PyString_AS_STRING(self), *sub, *repl;
1920 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001921 const int len = PyString_GET_SIZE(self);
1922 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926
Guido van Rossum4c08d552000-03-10 22:55:18 +00001927 if (!PyArg_ParseTuple(args, "OO|i:replace",
1928 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001930
1931 if (PyString_Check(subobj)) {
1932 sub = PyString_AS_STRING(subobj);
1933 sub_len = PyString_GET_SIZE(subobj);
1934 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001935#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001936 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001937 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001939#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001940 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1941 return NULL;
1942
1943 if (PyString_Check(replobj)) {
1944 repl = PyString_AS_STRING(replobj);
1945 repl_len = PyString_GET_SIZE(replobj);
1946 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001949 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001951#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1953 return NULL;
1954
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001955 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001956 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 return NULL;
1958 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001959 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960 if (new_s == NULL) {
1961 PyErr_NoMemory();
1962 return NULL;
1963 }
1964 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001965 if (PyString_CheckExact(self)) {
1966 /* we're returning another reference to self */
1967 new = (PyObject*)self;
1968 Py_INCREF(new);
1969 }
1970 else {
1971 new = PyString_FromStringAndSize(str, len);
1972 if (new == NULL)
1973 return NULL;
1974 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 }
1976 else {
1977 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001978 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979 }
1980 return new;
1981}
1982
1983
1984static char startswith__doc__[] =
1985"S.startswith(prefix[, start[, end]]) -> int\n\
1986\n\
1987Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1988optional start, test S beginning at that position. With optional end, stop\n\
1989comparing S at that position.";
1990
1991static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001992string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001994 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001996 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997 int plen;
1998 int start = 0;
1999 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002000 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
Guido van Rossumc6821402000-05-08 14:08:05 +00002002 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2003 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002004 return NULL;
2005 if (PyString_Check(subobj)) {
2006 prefix = PyString_AS_STRING(subobj);
2007 plen = PyString_GET_SIZE(subobj);
2008 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002009#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002010 else if (PyUnicode_Check(subobj)) {
2011 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002012 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002013 subobj, start, end, -1);
2014 if (rc == -1)
2015 return NULL;
2016 else
2017 return PyInt_FromLong((long) rc);
2018 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002019#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002020 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 return NULL;
2022
2023 /* adopt Java semantics for index out of range. it is legal for
2024 * offset to be == plen, but this only returns true if prefix is
2025 * the empty string.
2026 */
2027 if (start < 0 || start+plen > len)
2028 return PyInt_FromLong(0);
2029
2030 if (!memcmp(str+start, prefix, plen)) {
2031 /* did the match end after the specified end? */
2032 if (end < 0)
2033 return PyInt_FromLong(1);
2034 else if (end - start < plen)
2035 return PyInt_FromLong(0);
2036 else
2037 return PyInt_FromLong(1);
2038 }
2039 else return PyInt_FromLong(0);
2040}
2041
2042
2043static char endswith__doc__[] =
2044"S.endswith(suffix[, start[, end]]) -> int\n\
2045\n\
2046Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2047optional start, test S beginning at that position. With optional end, stop\n\
2048comparing S at that position.";
2049
2050static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002051string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 const char* suffix;
2056 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057 int start = 0;
2058 int end = -1;
2059 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
Guido van Rossumc6821402000-05-08 14:08:05 +00002062 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2063 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064 return NULL;
2065 if (PyString_Check(subobj)) {
2066 suffix = PyString_AS_STRING(subobj);
2067 slen = PyString_GET_SIZE(subobj);
2068 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002069#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002070 else if (PyUnicode_Check(subobj)) {
2071 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002072 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002073 subobj, start, end, +1);
2074 if (rc == -1)
2075 return NULL;
2076 else
2077 return PyInt_FromLong((long) rc);
2078 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002079#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 return NULL;
2082
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 return PyInt_FromLong(0);
2085
2086 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002087 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090 return PyInt_FromLong(1);
2091 else return PyInt_FromLong(0);
2092}
2093
2094
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002095static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002096"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002097\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002098Encodes S using the codec registered for encoding. encoding defaults\n\
2099to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002100handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2101a ValueError. Other possible values are 'ignore' and 'replace'.";
2102
2103static PyObject *
2104string_encode(PyStringObject *self, PyObject *args)
2105{
2106 char *encoding = NULL;
2107 char *errors = NULL;
2108 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2109 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002110 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2111}
2112
2113
2114static char decode__doc__[] =
2115"S.decode([encoding[,errors]]) -> object\n\
2116\n\
2117Decodes S using the codec registered for encoding. encoding defaults\n\
2118to the default encoding. errors may be given to set a different error\n\
2119handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2120a ValueError. Other possible values are 'ignore' and 'replace'.";
2121
2122static PyObject *
2123string_decode(PyStringObject *self, PyObject *args)
2124{
2125 char *encoding = NULL;
2126 char *errors = NULL;
2127 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2128 return NULL;
2129 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002130}
2131
2132
Guido van Rossum4c08d552000-03-10 22:55:18 +00002133static char expandtabs__doc__[] =
2134"S.expandtabs([tabsize]) -> string\n\
2135\n\
2136Return a copy of S where all tab characters are expanded using spaces.\n\
2137If tabsize is not given, a tab size of 8 characters is assumed.";
2138
2139static PyObject*
2140string_expandtabs(PyStringObject *self, PyObject *args)
2141{
2142 const char *e, *p;
2143 char *q;
2144 int i, j;
2145 PyObject *u;
2146 int tabsize = 8;
2147
2148 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2149 return NULL;
2150
Thomas Wouters7e474022000-07-16 12:04:32 +00002151 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 i = j = 0;
2153 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2154 for (p = PyString_AS_STRING(self); p < e; p++)
2155 if (*p == '\t') {
2156 if (tabsize > 0)
2157 j += tabsize - (j % tabsize);
2158 }
2159 else {
2160 j++;
2161 if (*p == '\n' || *p == '\r') {
2162 i += j;
2163 j = 0;
2164 }
2165 }
2166
2167 /* Second pass: create output string and fill it */
2168 u = PyString_FromStringAndSize(NULL, i + j);
2169 if (!u)
2170 return NULL;
2171
2172 j = 0;
2173 q = PyString_AS_STRING(u);
2174
2175 for (p = PyString_AS_STRING(self); p < e; p++)
2176 if (*p == '\t') {
2177 if (tabsize > 0) {
2178 i = tabsize - (j % tabsize);
2179 j += i;
2180 while (i--)
2181 *q++ = ' ';
2182 }
2183 }
2184 else {
2185 j++;
2186 *q++ = *p;
2187 if (*p == '\n' || *p == '\r')
2188 j = 0;
2189 }
2190
2191 return u;
2192}
2193
Tim Peters8fa5dd02001-09-12 02:18:30 +00002194static PyObject *
2195pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196{
2197 PyObject *u;
2198
2199 if (left < 0)
2200 left = 0;
2201 if (right < 0)
2202 right = 0;
2203
Tim Peters8fa5dd02001-09-12 02:18:30 +00002204 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 Py_INCREF(self);
2206 return (PyObject *)self;
2207 }
2208
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002209 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 left + PyString_GET_SIZE(self) + right);
2211 if (u) {
2212 if (left)
2213 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002214 memcpy(PyString_AS_STRING(u) + left,
2215 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216 PyString_GET_SIZE(self));
2217 if (right)
2218 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2219 fill, right);
2220 }
2221
2222 return u;
2223}
2224
2225static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002226"S.ljust(width) -> string\n"
2227"\n"
2228"Return S left justified in a string of length width. Padding is\n"
2229"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230
2231static PyObject *
2232string_ljust(PyStringObject *self, PyObject *args)
2233{
2234 int width;
2235 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2236 return NULL;
2237
Tim Peters8fa5dd02001-09-12 02:18:30 +00002238 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 Py_INCREF(self);
2240 return (PyObject*) self;
2241 }
2242
2243 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2244}
2245
2246
2247static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002248"S.rjust(width) -> string\n"
2249"\n"
2250"Return S right justified in a string of length width. Padding is\n"
2251"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252
2253static PyObject *
2254string_rjust(PyStringObject *self, PyObject *args)
2255{
2256 int width;
2257 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2258 return NULL;
2259
Tim Peters8fa5dd02001-09-12 02:18:30 +00002260 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 Py_INCREF(self);
2262 return (PyObject*) self;
2263 }
2264
2265 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2266}
2267
2268
2269static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002270"S.center(width) -> string\n"
2271"\n"
2272"Return S centered in a string of length width. Padding is done\n"
2273"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274
2275static PyObject *
2276string_center(PyStringObject *self, PyObject *args)
2277{
2278 int marg, left;
2279 int width;
2280
2281 if (!PyArg_ParseTuple(args, "i:center", &width))
2282 return NULL;
2283
Tim Peters8fa5dd02001-09-12 02:18:30 +00002284 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285 Py_INCREF(self);
2286 return (PyObject*) self;
2287 }
2288
2289 marg = width - PyString_GET_SIZE(self);
2290 left = marg / 2 + (marg & width & 1);
2291
2292 return pad(self, left, marg - left, ' ');
2293}
2294
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002296"S.isspace() -> int\n"
2297"\n"
2298"Return 1 if there are only whitespace characters in S,\n"
2299"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300
2301static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002302string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303{
Fred Drakeba096332000-07-09 07:04:36 +00002304 register const unsigned char *p
2305 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002306 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 /* Shortcut for single character strings */
2309 if (PyString_GET_SIZE(self) == 1 &&
2310 isspace(*p))
2311 return PyInt_FromLong(1);
2312
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002313 /* Special case for empty strings */
2314 if (PyString_GET_SIZE(self) == 0)
2315 return PyInt_FromLong(0);
2316
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 e = p + PyString_GET_SIZE(self);
2318 for (; p < e; p++) {
2319 if (!isspace(*p))
2320 return PyInt_FromLong(0);
2321 }
2322 return PyInt_FromLong(1);
2323}
2324
2325
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002326static char isalpha__doc__[] =
2327"S.isalpha() -> int\n\
2328\n\
2329Return 1 if all characters in S are alphabetic\n\
2330and there is at least one character in S, 0 otherwise.";
2331
2332static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002333string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002334{
Fred Drakeba096332000-07-09 07:04:36 +00002335 register const unsigned char *p
2336 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002337 register const unsigned char *e;
2338
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002339 /* Shortcut for single character strings */
2340 if (PyString_GET_SIZE(self) == 1 &&
2341 isalpha(*p))
2342 return PyInt_FromLong(1);
2343
2344 /* Special case for empty strings */
2345 if (PyString_GET_SIZE(self) == 0)
2346 return PyInt_FromLong(0);
2347
2348 e = p + PyString_GET_SIZE(self);
2349 for (; p < e; p++) {
2350 if (!isalpha(*p))
2351 return PyInt_FromLong(0);
2352 }
2353 return PyInt_FromLong(1);
2354}
2355
2356
2357static char isalnum__doc__[] =
2358"S.isalnum() -> int\n\
2359\n\
2360Return 1 if all characters in S are alphanumeric\n\
2361and there is at least one character in S, 0 otherwise.";
2362
2363static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002364string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002365{
Fred Drakeba096332000-07-09 07:04:36 +00002366 register const unsigned char *p
2367 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002368 register const unsigned char *e;
2369
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002370 /* Shortcut for single character strings */
2371 if (PyString_GET_SIZE(self) == 1 &&
2372 isalnum(*p))
2373 return PyInt_FromLong(1);
2374
2375 /* Special case for empty strings */
2376 if (PyString_GET_SIZE(self) == 0)
2377 return PyInt_FromLong(0);
2378
2379 e = p + PyString_GET_SIZE(self);
2380 for (; p < e; p++) {
2381 if (!isalnum(*p))
2382 return PyInt_FromLong(0);
2383 }
2384 return PyInt_FromLong(1);
2385}
2386
2387
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388static char isdigit__doc__[] =
2389"S.isdigit() -> int\n\
2390\n\
2391Return 1 if there are only digit characters in S,\n\
23920 otherwise.";
2393
2394static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002395string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396{
Fred Drakeba096332000-07-09 07:04:36 +00002397 register const unsigned char *p
2398 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002399 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 /* Shortcut for single character strings */
2402 if (PyString_GET_SIZE(self) == 1 &&
2403 isdigit(*p))
2404 return PyInt_FromLong(1);
2405
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002406 /* Special case for empty strings */
2407 if (PyString_GET_SIZE(self) == 0)
2408 return PyInt_FromLong(0);
2409
Guido van Rossum4c08d552000-03-10 22:55:18 +00002410 e = p + PyString_GET_SIZE(self);
2411 for (; p < e; p++) {
2412 if (!isdigit(*p))
2413 return PyInt_FromLong(0);
2414 }
2415 return PyInt_FromLong(1);
2416}
2417
2418
2419static char islower__doc__[] =
2420"S.islower() -> int\n\
2421\n\
2422Return 1 if all cased characters in S are lowercase and there is\n\
2423at least one cased character in S, 0 otherwise.";
2424
2425static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002426string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427{
Fred Drakeba096332000-07-09 07:04:36 +00002428 register const unsigned char *p
2429 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002430 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431 int cased;
2432
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 /* Shortcut for single character strings */
2434 if (PyString_GET_SIZE(self) == 1)
2435 return PyInt_FromLong(islower(*p) != 0);
2436
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002437 /* Special case for empty strings */
2438 if (PyString_GET_SIZE(self) == 0)
2439 return PyInt_FromLong(0);
2440
Guido van Rossum4c08d552000-03-10 22:55:18 +00002441 e = p + PyString_GET_SIZE(self);
2442 cased = 0;
2443 for (; p < e; p++) {
2444 if (isupper(*p))
2445 return PyInt_FromLong(0);
2446 else if (!cased && islower(*p))
2447 cased = 1;
2448 }
2449 return PyInt_FromLong(cased);
2450}
2451
2452
2453static char isupper__doc__[] =
2454"S.isupper() -> int\n\
2455\n\
2456Return 1 if all cased characters in S are uppercase and there is\n\
2457at least one cased character in S, 0 otherwise.";
2458
2459static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002460string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461{
Fred Drakeba096332000-07-09 07:04:36 +00002462 register const unsigned char *p
2463 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002464 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 int cased;
2466
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 /* Shortcut for single character strings */
2468 if (PyString_GET_SIZE(self) == 1)
2469 return PyInt_FromLong(isupper(*p) != 0);
2470
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002471 /* Special case for empty strings */
2472 if (PyString_GET_SIZE(self) == 0)
2473 return PyInt_FromLong(0);
2474
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475 e = p + PyString_GET_SIZE(self);
2476 cased = 0;
2477 for (; p < e; p++) {
2478 if (islower(*p))
2479 return PyInt_FromLong(0);
2480 else if (!cased && isupper(*p))
2481 cased = 1;
2482 }
2483 return PyInt_FromLong(cased);
2484}
2485
2486
2487static char istitle__doc__[] =
2488"S.istitle() -> int\n\
2489\n\
2490Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2491may only follow uncased characters and lowercase characters only cased\n\
2492ones. Return 0 otherwise.";
2493
2494static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002495string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496{
Fred Drakeba096332000-07-09 07:04:36 +00002497 register const unsigned char *p
2498 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002499 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002500 int cased, previous_is_cased;
2501
Guido van Rossum4c08d552000-03-10 22:55:18 +00002502 /* Shortcut for single character strings */
2503 if (PyString_GET_SIZE(self) == 1)
2504 return PyInt_FromLong(isupper(*p) != 0);
2505
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002506 /* Special case for empty strings */
2507 if (PyString_GET_SIZE(self) == 0)
2508 return PyInt_FromLong(0);
2509
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510 e = p + PyString_GET_SIZE(self);
2511 cased = 0;
2512 previous_is_cased = 0;
2513 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002514 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515
2516 if (isupper(ch)) {
2517 if (previous_is_cased)
2518 return PyInt_FromLong(0);
2519 previous_is_cased = 1;
2520 cased = 1;
2521 }
2522 else if (islower(ch)) {
2523 if (!previous_is_cased)
2524 return PyInt_FromLong(0);
2525 previous_is_cased = 1;
2526 cased = 1;
2527 }
2528 else
2529 previous_is_cased = 0;
2530 }
2531 return PyInt_FromLong(cased);
2532}
2533
2534
2535static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002536"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002537\n\
2538Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002539Line breaks are not included in the resulting list unless keepends\n\
2540is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541
2542#define SPLIT_APPEND(data, left, right) \
2543 str = PyString_FromStringAndSize(data + left, right - left); \
2544 if (!str) \
2545 goto onError; \
2546 if (PyList_Append(list, str)) { \
2547 Py_DECREF(str); \
2548 goto onError; \
2549 } \
2550 else \
2551 Py_DECREF(str);
2552
2553static PyObject*
2554string_splitlines(PyStringObject *self, PyObject *args)
2555{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002556 register int i;
2557 register int j;
2558 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002559 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560 PyObject *list;
2561 PyObject *str;
2562 char *data;
2563
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002564 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 return NULL;
2566
2567 data = PyString_AS_STRING(self);
2568 len = PyString_GET_SIZE(self);
2569
Guido van Rossum4c08d552000-03-10 22:55:18 +00002570 list = PyList_New(0);
2571 if (!list)
2572 goto onError;
2573
2574 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002575 int eol;
2576
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577 /* Find a line and append it */
2578 while (i < len && data[i] != '\n' && data[i] != '\r')
2579 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580
2581 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002582 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583 if (i < len) {
2584 if (data[i] == '\r' && i + 1 < len &&
2585 data[i+1] == '\n')
2586 i += 2;
2587 else
2588 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002589 if (keepends)
2590 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002592 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 j = i;
2594 }
2595 if (j < len) {
2596 SPLIT_APPEND(data, j, len);
2597 }
2598
2599 return list;
2600
2601 onError:
2602 Py_DECREF(list);
2603 return NULL;
2604}
2605
2606#undef SPLIT_APPEND
2607
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002608
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002609static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 /* Counterparts of the obsolete stropmodule functions; except
2612 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002613 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2614 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2615 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2616 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2617 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2618 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2619 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2620 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2621 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2622 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2623 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2624 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2625 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2626 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2627 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2628 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2629 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2630 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2631 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2632 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2633 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2634 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2635 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2636 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2637 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2638 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2639 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2640 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2641 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2642 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2643 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2644 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2645 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002647 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002649 {NULL, NULL} /* sentinel */
2650};
2651
Guido van Rossumae960af2001-08-30 03:11:59 +00002652staticforward PyObject *
2653str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2654
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002655static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002656string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002657{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002658 PyObject *x = NULL;
2659 static char *kwlist[] = {"object", 0};
2660
Guido van Rossumae960af2001-08-30 03:11:59 +00002661 if (type != &PyString_Type)
2662 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002663 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2664 return NULL;
2665 if (x == NULL)
2666 return PyString_FromString("");
2667 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002668}
2669
Guido van Rossumae960af2001-08-30 03:11:59 +00002670static PyObject *
2671str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2672{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002673 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002674 int n;
2675
2676 assert(PyType_IsSubtype(type, &PyString_Type));
2677 tmp = string_new(&PyString_Type, args, kwds);
2678 if (tmp == NULL)
2679 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002680 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002681 n = PyString_GET_SIZE(tmp);
2682 pnew = type->tp_alloc(type, n);
2683 if (pnew != NULL) {
2684 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2685#ifdef CACHE_HASH
2686 ((PyStringObject *)pnew)->ob_shash =
2687 ((PyStringObject *)tmp)->ob_shash;
2688#endif
2689#ifdef INTERN_STRINGS
2690 ((PyStringObject *)pnew)->ob_sinterned =
2691 ((PyStringObject *)tmp)->ob_sinterned;
2692#endif
2693 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002694 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002695 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002696}
2697
Tim Peters6d6c1a32001-08-02 04:15:00 +00002698static char string_doc[] =
2699"str(object) -> string\n\
2700\n\
2701Return a nice string representation of the object.\n\
2702If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002703
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002704PyTypeObject PyString_Type = {
2705 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002706 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002707 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002708 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002709 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002710 (destructor)string_dealloc, /* tp_dealloc */
2711 (printfunc)string_print, /* tp_print */
2712 0, /* tp_getattr */
2713 0, /* tp_setattr */
2714 0, /* tp_compare */
2715 (reprfunc)string_repr, /* tp_repr */
2716 0, /* tp_as_number */
2717 &string_as_sequence, /* tp_as_sequence */
2718 0, /* tp_as_mapping */
2719 (hashfunc)string_hash, /* tp_hash */
2720 0, /* tp_call */
2721 (reprfunc)string_str, /* tp_str */
2722 PyObject_GenericGetAttr, /* tp_getattro */
2723 0, /* tp_setattro */
2724 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002725 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002726 string_doc, /* tp_doc */
2727 0, /* tp_traverse */
2728 0, /* tp_clear */
2729 (richcmpfunc)string_richcompare, /* tp_richcompare */
2730 0, /* tp_weaklistoffset */
2731 0, /* tp_iter */
2732 0, /* tp_iternext */
2733 string_methods, /* tp_methods */
2734 0, /* tp_members */
2735 0, /* tp_getset */
2736 0, /* tp_base */
2737 0, /* tp_dict */
2738 0, /* tp_descr_get */
2739 0, /* tp_descr_set */
2740 0, /* tp_dictoffset */
2741 0, /* tp_init */
2742 0, /* tp_alloc */
2743 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002744};
2745
2746void
Fred Drakeba096332000-07-09 07:04:36 +00002747PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002748{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002749 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002750 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002751 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002752 if (w == NULL || !PyString_Check(*pv)) {
2753 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002754 *pv = NULL;
2755 return;
2756 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002757 v = string_concat((PyStringObject *) *pv, w);
2758 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002759 *pv = v;
2760}
2761
Guido van Rossum013142a1994-08-30 08:19:36 +00002762void
Fred Drakeba096332000-07-09 07:04:36 +00002763PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002764{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002765 PyString_Concat(pv, w);
2766 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002767}
2768
2769
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002770/* The following function breaks the notion that strings are immutable:
2771 it changes the size of a string. We get away with this only if there
2772 is only one module referencing the object. You can also think of it
2773 as creating a new string object and destroying the old one, only
2774 more efficiently. In any case, don't use this if the string may
2775 already be known to some other part of the code... */
2776
2777int
Fred Drakeba096332000-07-09 07:04:36 +00002778_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002779{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002780 register PyObject *v;
2781 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002782 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002783 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002784 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 Py_DECREF(v);
2786 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002787 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002788 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002789 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002790#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002791 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002792#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 _Py_ForgetReference(v);
2794 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002795 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002796 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002797 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002798 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002799 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002800 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002801 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802 _Py_NewReference(*pv);
2803 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002804 sv->ob_size = newsize;
2805 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002806 return 0;
2807}
Guido van Rossume5372401993-03-16 12:15:04 +00002808
2809/* Helpers for formatstring */
2810
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002812getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002813{
2814 int argidx = *p_argidx;
2815 if (argidx < arglen) {
2816 (*p_argidx)++;
2817 if (arglen < 0)
2818 return args;
2819 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002820 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002821 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 PyErr_SetString(PyExc_TypeError,
2823 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002824 return NULL;
2825}
2826
Tim Peters38fd5b62000-09-21 05:43:11 +00002827/* Format codes
2828 * F_LJUST '-'
2829 * F_SIGN '+'
2830 * F_BLANK ' '
2831 * F_ALT '#'
2832 * F_ZERO '0'
2833 */
Guido van Rossume5372401993-03-16 12:15:04 +00002834#define F_LJUST (1<<0)
2835#define F_SIGN (1<<1)
2836#define F_BLANK (1<<2)
2837#define F_ALT (1<<3)
2838#define F_ZERO (1<<4)
2839
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002840static int
Fred Drakeba096332000-07-09 07:04:36 +00002841formatfloat(char *buf, size_t buflen, int flags,
2842 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002843{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002844 /* fmt = '%#.' + `prec` + `type`
2845 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002846 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002847 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002848 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002849 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002850 if (prec < 0)
2851 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002852 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2853 type = 'g';
2854 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002855 /* worst case length calc to ensure no buffer overrun:
2856 fmt = %#.<prec>g
2857 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002858 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002859 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2860 If prec=0 the effective precision is 1 (the leading digit is
2861 always given), therefore increase by one to 10+prec. */
2862 if (buflen <= (size_t)10 + (size_t)prec) {
2863 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002864 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002865 return -1;
2866 }
Guido van Rossume5372401993-03-16 12:15:04 +00002867 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002868 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002869}
2870
Tim Peters38fd5b62000-09-21 05:43:11 +00002871/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2872 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2873 * Python's regular ints.
2874 * Return value: a new PyString*, or NULL if error.
2875 * . *pbuf is set to point into it,
2876 * *plen set to the # of chars following that.
2877 * Caller must decref it when done using pbuf.
2878 * The string starting at *pbuf is of the form
2879 * "-"? ("0x" | "0X")? digit+
2880 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002881 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002882 * There will be at least prec digits, zero-filled on the left if
2883 * necessary to get that many.
2884 * val object to be converted
2885 * flags bitmask of format flags; only F_ALT is looked at
2886 * prec minimum number of digits; 0-fill on left if needed
2887 * type a character in [duoxX]; u acts the same as d
2888 *
2889 * CAUTION: o, x and X conversions on regular ints can never
2890 * produce a '-' sign, but can for Python's unbounded ints.
2891 */
2892PyObject*
2893_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2894 char **pbuf, int *plen)
2895{
2896 PyObject *result = NULL;
2897 char *buf;
2898 int i;
2899 int sign; /* 1 if '-', else 0 */
2900 int len; /* number of characters */
2901 int numdigits; /* len == numnondigits + numdigits */
2902 int numnondigits = 0;
2903
2904 switch (type) {
2905 case 'd':
2906 case 'u':
2907 result = val->ob_type->tp_str(val);
2908 break;
2909 case 'o':
2910 result = val->ob_type->tp_as_number->nb_oct(val);
2911 break;
2912 case 'x':
2913 case 'X':
2914 numnondigits = 2;
2915 result = val->ob_type->tp_as_number->nb_hex(val);
2916 break;
2917 default:
2918 assert(!"'type' not in [duoxX]");
2919 }
2920 if (!result)
2921 return NULL;
2922
2923 /* To modify the string in-place, there can only be one reference. */
2924 if (result->ob_refcnt != 1) {
2925 PyErr_BadInternalCall();
2926 return NULL;
2927 }
2928 buf = PyString_AsString(result);
2929 len = PyString_Size(result);
2930 if (buf[len-1] == 'L') {
2931 --len;
2932 buf[len] = '\0';
2933 }
2934 sign = buf[0] == '-';
2935 numnondigits += sign;
2936 numdigits = len - numnondigits;
2937 assert(numdigits > 0);
2938
Tim Petersfff53252001-04-12 18:38:48 +00002939 /* Get rid of base marker unless F_ALT */
2940 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002941 /* Need to skip 0x, 0X or 0. */
2942 int skipped = 0;
2943 switch (type) {
2944 case 'o':
2945 assert(buf[sign] == '0');
2946 /* If 0 is only digit, leave it alone. */
2947 if (numdigits > 1) {
2948 skipped = 1;
2949 --numdigits;
2950 }
2951 break;
2952 case 'x':
2953 case 'X':
2954 assert(buf[sign] == '0');
2955 assert(buf[sign + 1] == 'x');
2956 skipped = 2;
2957 numnondigits -= 2;
2958 break;
2959 }
2960 if (skipped) {
2961 buf += skipped;
2962 len -= skipped;
2963 if (sign)
2964 buf[0] = '-';
2965 }
2966 assert(len == numnondigits + numdigits);
2967 assert(numdigits > 0);
2968 }
2969
2970 /* Fill with leading zeroes to meet minimum width. */
2971 if (prec > numdigits) {
2972 PyObject *r1 = PyString_FromStringAndSize(NULL,
2973 numnondigits + prec);
2974 char *b1;
2975 if (!r1) {
2976 Py_DECREF(result);
2977 return NULL;
2978 }
2979 b1 = PyString_AS_STRING(r1);
2980 for (i = 0; i < numnondigits; ++i)
2981 *b1++ = *buf++;
2982 for (i = 0; i < prec - numdigits; i++)
2983 *b1++ = '0';
2984 for (i = 0; i < numdigits; i++)
2985 *b1++ = *buf++;
2986 *b1 = '\0';
2987 Py_DECREF(result);
2988 result = r1;
2989 buf = PyString_AS_STRING(result);
2990 len = numnondigits + prec;
2991 }
2992
2993 /* Fix up case for hex conversions. */
2994 switch (type) {
2995 case 'x':
2996 /* Need to convert all upper case letters to lower case. */
2997 for (i = 0; i < len; i++)
2998 if (buf[i] >= 'A' && buf[i] <= 'F')
2999 buf[i] += 'a'-'A';
3000 break;
3001 case 'X':
3002 /* Need to convert 0x to 0X (and -0x to -0X). */
3003 if (buf[sign + 1] == 'x')
3004 buf[sign + 1] = 'X';
3005 break;
3006 }
3007 *pbuf = buf;
3008 *plen = len;
3009 return result;
3010}
3011
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003012static int
Fred Drakeba096332000-07-09 07:04:36 +00003013formatint(char *buf, size_t buflen, int flags,
3014 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003015{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003016 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003017 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3018 + 1 + 1 = 24 */
3019 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003020 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003021 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003022 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003023 if (prec < 0)
3024 prec = 1;
3025 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003026 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003027 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003028 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003029 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003030 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003031 return -1;
3032 }
Guido van Rossume5372401993-03-16 12:15:04 +00003033 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003034 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3035 * but we want it (for consistency with other %#x conversions, and
3036 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003037 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3038 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3039 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003040 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003041 if (x == 0 &&
3042 (flags & F_ALT) &&
3043 (type == 'x' || type == 'X') &&
3044 buf[1] != (char)type) /* this last always true under std C */
3045 {
Tim Petersfff53252001-04-12 18:38:48 +00003046 memmove(buf+2, buf, strlen(buf) + 1);
3047 buf[0] = '0';
3048 buf[1] = (char)type;
3049 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003050 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003051}
3052
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003053static int
Fred Drakeba096332000-07-09 07:04:36 +00003054formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003055{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003056 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057 if (PyString_Check(v)) {
3058 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003059 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003060 }
3061 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003062 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003063 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003064 }
3065 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003066 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003067}
3068
Guido van Rossum013142a1994-08-30 08:19:36 +00003069
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003070/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3071
3072 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3073 chars are formatted. XXX This is a magic number. Each formatting
3074 routine does bounds checking to ensure no overflow, but a better
3075 solution may be to malloc a buffer of appropriate size for each
3076 format. For now, the current solution is sufficient.
3077*/
3078#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003079
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003080PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003081PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003082{
3083 char *fmt, *res;
3084 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003085 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003086 PyObject *result, *orig_args;
3087#ifdef Py_USING_UNICODE
3088 PyObject *v, *w;
3089#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003090 PyObject *dict = NULL;
3091 if (format == NULL || !PyString_Check(format) || args == NULL) {
3092 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003093 return NULL;
3094 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003095 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003096 fmt = PyString_AsString(format);
3097 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003098 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003099 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003100 if (result == NULL)
3101 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 res = PyString_AsString(result);
3103 if (PyTuple_Check(args)) {
3104 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003105 argidx = 0;
3106 }
3107 else {
3108 arglen = -1;
3109 argidx = -2;
3110 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003111 if (args->ob_type->tp_as_mapping)
3112 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003113 while (--fmtcnt >= 0) {
3114 if (*fmt != '%') {
3115 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003116 rescnt = fmtcnt + 100;
3117 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003118 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003119 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 res = PyString_AsString(result)
3121 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003122 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003123 }
3124 *res++ = *fmt++;
3125 }
3126 else {
3127 /* Got a format specifier */
3128 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003129 int width = -1;
3130 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003131 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003132 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133 PyObject *v = NULL;
3134 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003135 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003136 int sign;
3137 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003138 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003139#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003140 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003141 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003142#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003143
Guido van Rossumda9c2711996-12-05 21:58:58 +00003144 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003145 if (*fmt == '(') {
3146 char *keystart;
3147 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003148 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003149 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003150
3151 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003152 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003153 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003154 goto error;
3155 }
3156 ++fmt;
3157 --fmtcnt;
3158 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003159 /* Skip over balanced parentheses */
3160 while (pcount > 0 && --fmtcnt >= 0) {
3161 if (*fmt == ')')
3162 --pcount;
3163 else if (*fmt == '(')
3164 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003165 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003166 }
3167 keylen = fmt - keystart - 1;
3168 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003169 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003170 "incomplete format key");
3171 goto error;
3172 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003173 key = PyString_FromStringAndSize(keystart,
3174 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003175 if (key == NULL)
3176 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003177 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003178 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003179 args_owned = 0;
3180 }
3181 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003182 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003183 if (args == NULL) {
3184 goto error;
3185 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003186 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003187 arglen = -1;
3188 argidx = -2;
3189 }
Guido van Rossume5372401993-03-16 12:15:04 +00003190 while (--fmtcnt >= 0) {
3191 switch (c = *fmt++) {
3192 case '-': flags |= F_LJUST; continue;
3193 case '+': flags |= F_SIGN; continue;
3194 case ' ': flags |= F_BLANK; continue;
3195 case '#': flags |= F_ALT; continue;
3196 case '0': flags |= F_ZERO; continue;
3197 }
3198 break;
3199 }
3200 if (c == '*') {
3201 v = getnextarg(args, arglen, &argidx);
3202 if (v == NULL)
3203 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204 if (!PyInt_Check(v)) {
3205 PyErr_SetString(PyExc_TypeError,
3206 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003207 goto error;
3208 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003209 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003210 if (width < 0) {
3211 flags |= F_LJUST;
3212 width = -width;
3213 }
Guido van Rossume5372401993-03-16 12:15:04 +00003214 if (--fmtcnt >= 0)
3215 c = *fmt++;
3216 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003217 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003218 width = c - '0';
3219 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003220 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003221 if (!isdigit(c))
3222 break;
3223 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 PyErr_SetString(
3225 PyExc_ValueError,
3226 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003227 goto error;
3228 }
3229 width = width*10 + (c - '0');
3230 }
3231 }
3232 if (c == '.') {
3233 prec = 0;
3234 if (--fmtcnt >= 0)
3235 c = *fmt++;
3236 if (c == '*') {
3237 v = getnextarg(args, arglen, &argidx);
3238 if (v == NULL)
3239 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003240 if (!PyInt_Check(v)) {
3241 PyErr_SetString(
3242 PyExc_TypeError,
3243 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003244 goto error;
3245 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003247 if (prec < 0)
3248 prec = 0;
3249 if (--fmtcnt >= 0)
3250 c = *fmt++;
3251 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003252 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003253 prec = c - '0';
3254 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003255 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003256 if (!isdigit(c))
3257 break;
3258 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003259 PyErr_SetString(
3260 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003261 "prec too big");
3262 goto error;
3263 }
3264 prec = prec*10 + (c - '0');
3265 }
3266 }
3267 } /* prec */
3268 if (fmtcnt >= 0) {
3269 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003270 if (--fmtcnt >= 0)
3271 c = *fmt++;
3272 }
3273 }
3274 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003275 PyErr_SetString(PyExc_ValueError,
3276 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003277 goto error;
3278 }
3279 if (c != '%') {
3280 v = getnextarg(args, arglen, &argidx);
3281 if (v == NULL)
3282 goto error;
3283 }
3284 sign = 0;
3285 fill = ' ';
3286 switch (c) {
3287 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003288 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003289 len = 1;
3290 break;
3291 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003292 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003293#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003294 if (PyUnicode_Check(v)) {
3295 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003296 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003297 goto unicode;
3298 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003299#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003300 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003301 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003302 else
3303 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003304 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003305 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003306 if (!PyString_Check(temp)) {
3307 PyErr_SetString(PyExc_TypeError,
3308 "%s argument has non-string str()");
3309 goto error;
3310 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003311 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003313 if (prec >= 0 && len > prec)
3314 len = prec;
3315 break;
3316 case 'i':
3317 case 'd':
3318 case 'u':
3319 case 'o':
3320 case 'x':
3321 case 'X':
3322 if (c == 'i')
3323 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003324 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003325 temp = _PyString_FormatLong(v, flags,
3326 prec, c, &pbuf, &len);
3327 if (!temp)
3328 goto error;
3329 /* unbounded ints can always produce
3330 a sign character! */
3331 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003332 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003333 else {
3334 pbuf = formatbuf;
3335 len = formatint(pbuf, sizeof(formatbuf),
3336 flags, prec, c, v);
3337 if (len < 0)
3338 goto error;
3339 /* only d conversion is signed */
3340 sign = c == 'd';
3341 }
3342 if (flags & F_ZERO)
3343 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003344 break;
3345 case 'e':
3346 case 'E':
3347 case 'f':
3348 case 'g':
3349 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003350 pbuf = formatbuf;
3351 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003352 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003353 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003354 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003355 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003356 fill = '0';
3357 break;
3358 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003359 pbuf = formatbuf;
3360 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003361 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003362 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003363 break;
3364 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003365 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003366 "unsupported format character '%c' (0x%x) "
3367 "at index %i",
3368 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003369 goto error;
3370 }
3371 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003372 if (*pbuf == '-' || *pbuf == '+') {
3373 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003374 len--;
3375 }
3376 else if (flags & F_SIGN)
3377 sign = '+';
3378 else if (flags & F_BLANK)
3379 sign = ' ';
3380 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003381 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003382 }
3383 if (width < len)
3384 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003385 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003386 reslen -= rescnt;
3387 rescnt = width + fmtcnt + 100;
3388 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003389 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003390 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003391 res = PyString_AsString(result)
3392 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003393 }
3394 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003395 if (fill != ' ')
3396 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003397 rescnt--;
3398 if (width > len)
3399 width--;
3400 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003401 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3402 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003403 assert(pbuf[1] == c);
3404 if (fill != ' ') {
3405 *res++ = *pbuf++;
3406 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003407 }
Tim Petersfff53252001-04-12 18:38:48 +00003408 rescnt -= 2;
3409 width -= 2;
3410 if (width < 0)
3411 width = 0;
3412 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003413 }
3414 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003415 do {
3416 --rescnt;
3417 *res++ = fill;
3418 } while (--width > len);
3419 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003420 if (fill == ' ') {
3421 if (sign)
3422 *res++ = sign;
3423 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003424 (c == 'x' || c == 'X')) {
3425 assert(pbuf[0] == '0');
3426 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003427 *res++ = *pbuf++;
3428 *res++ = *pbuf++;
3429 }
3430 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003431 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003432 res += len;
3433 rescnt -= len;
3434 while (--width >= len) {
3435 --rescnt;
3436 *res++ = ' ';
3437 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003438 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003439 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003440 "not all arguments converted");
3441 goto error;
3442 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003444 } /* '%' */
3445 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003446 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003447 PyErr_SetString(PyExc_TypeError,
3448 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003449 goto error;
3450 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003451 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003452 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003453 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003455 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003456
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003457#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003458 unicode:
3459 if (args_owned) {
3460 Py_DECREF(args);
3461 args_owned = 0;
3462 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003463 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003464 if (PyTuple_Check(orig_args) && argidx > 0) {
3465 PyObject *v;
3466 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3467 v = PyTuple_New(n);
3468 if (v == NULL)
3469 goto error;
3470 while (--n >= 0) {
3471 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3472 Py_INCREF(w);
3473 PyTuple_SET_ITEM(v, n, w);
3474 }
3475 args = v;
3476 } else {
3477 Py_INCREF(orig_args);
3478 args = orig_args;
3479 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003480 args_owned = 1;
3481 /* Take what we have of the result and let the Unicode formatting
3482 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003483 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003484 if (_PyString_Resize(&result, rescnt))
3485 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003486 fmtcnt = PyString_GET_SIZE(format) - \
3487 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003488 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3489 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003490 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003491 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003492 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003493 if (v == NULL)
3494 goto error;
3495 /* Paste what we have (result) to what the Unicode formatting
3496 function returned (v) and return the result (or error) */
3497 w = PyUnicode_Concat(result, v);
3498 Py_DECREF(result);
3499 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003500 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003501 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003502#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003503
Guido van Rossume5372401993-03-16 12:15:04 +00003504 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003505 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003506 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003507 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003508 }
Guido van Rossume5372401993-03-16 12:15:04 +00003509 return NULL;
3510}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003511
3512
3513#ifdef INTERN_STRINGS
3514
Barry Warsaw4df762f2000-08-16 23:41:01 +00003515/* This dictionary will leak at PyString_Fini() time. That's acceptable
3516 * because PyString_Fini() specifically frees interned strings that are
3517 * only referenced by this dictionary. The CVS log entry for revision 2.45
3518 * says:
3519 *
3520 * Change the Fini function to only remove otherwise unreferenced
3521 * strings from the interned table. There are references in
3522 * hard-to-find static variables all over the interpreter, and it's not
3523 * worth trying to get rid of all those; but "uninterning" isn't fair
3524 * either and may cause subtle failures later -- so we have to keep them
3525 * in the interned table.
3526 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003527static PyObject *interned;
3528
3529void
Fred Drakeba096332000-07-09 07:04:36 +00003530PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003531{
3532 register PyStringObject *s = (PyStringObject *)(*p);
3533 PyObject *t;
3534 if (s == NULL || !PyString_Check(s))
3535 Py_FatalError("PyString_InternInPlace: strings only please!");
3536 if ((t = s->ob_sinterned) != NULL) {
3537 if (t == (PyObject *)s)
3538 return;
3539 Py_INCREF(t);
3540 *p = t;
3541 Py_DECREF(s);
3542 return;
3543 }
3544 if (interned == NULL) {
3545 interned = PyDict_New();
3546 if (interned == NULL)
3547 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003548 }
3549 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3550 Py_INCREF(t);
3551 *p = s->ob_sinterned = t;
3552 Py_DECREF(s);
3553 return;
3554 }
Tim Peters111f6092001-09-12 07:54:51 +00003555 /* Ensure that only true string objects appear in the intern dict,
3556 and as the value of ob_sinterned. */
3557 if (PyString_CheckExact(s)) {
3558 t = (PyObject *)s;
3559 if (PyDict_SetItem(interned, t, t) == 0) {
3560 s->ob_sinterned = t;
3561 return;
3562 }
3563 }
3564 else {
3565 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3566 PyString_GET_SIZE(s));
3567 if (t != NULL) {
3568 if (PyDict_SetItem(interned, t, t) == 0) {
3569 *p = s->ob_sinterned = t;
3570 Py_DECREF(s);
3571 return;
3572 }
3573 Py_DECREF(t);
3574 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003575 }
3576 PyErr_Clear();
3577}
3578
3579
3580PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003581PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003582{
3583 PyObject *s = PyString_FromString(cp);
3584 if (s == NULL)
3585 return NULL;
3586 PyString_InternInPlace(&s);
3587 return s;
3588}
3589
3590#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003591
3592void
Fred Drakeba096332000-07-09 07:04:36 +00003593PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003594{
3595 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003596 for (i = 0; i < UCHAR_MAX + 1; i++) {
3597 Py_XDECREF(characters[i]);
3598 characters[i] = NULL;
3599 }
3600#ifndef DONT_SHARE_SHORT_STRINGS
3601 Py_XDECREF(nullstring);
3602 nullstring = NULL;
3603#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003604#ifdef INTERN_STRINGS
3605 if (interned) {
3606 int pos, changed;
3607 PyObject *key, *value;
3608 do {
3609 changed = 0;
3610 pos = 0;
3611 while (PyDict_Next(interned, &pos, &key, &value)) {
3612 if (key->ob_refcnt == 2 && key == value) {
3613 PyDict_DelItem(interned, key);
3614 changed = 1;
3615 }
3616 }
3617 } while (changed);
3618 }
3619#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003620}
Barry Warsawa903ad982001-02-23 16:40:48 +00003621
3622#ifdef INTERN_STRINGS
3623void _Py_ReleaseInternedStrings(void)
3624{
3625 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003626 fprintf(stderr, "releasing interned strings\n");
3627 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003628 Py_DECREF(interned);
3629 interned = NULL;
3630 }
3631}
3632#endif /* INTERN_STRINGS */