blob: 9ea32a203d917b05fe5933c327068abc772db8d8 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
295 _PyString_Resize(&string, s - PyString_AsString(string));
296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
302 va_list vargs;
303
304#ifdef HAVE_STDARG_PROTOTYPES
305 va_start(vargs, format);
306#else
307 va_start(vargs);
308#endif
309 return PyString_FromFormatV(format, vargs);
310}
311
312
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000313PyObject *PyString_Decode(const char *s,
314 int size,
315 const char *encoding,
316 const char *errors)
317{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000318 PyObject *v, *str;
319
320 str = PyString_FromStringAndSize(s, size);
321 if (str == NULL)
322 return NULL;
323 v = PyString_AsDecodedString(str, encoding, errors);
324 Py_DECREF(str);
325 return v;
326}
327
328PyObject *PyString_AsDecodedObject(PyObject *str,
329 const char *encoding,
330 const char *errors)
331{
332 PyObject *v;
333
334 if (!PyString_Check(str)) {
335 PyErr_BadArgument();
336 goto onError;
337 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000338
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000339 if (encoding == NULL) {
340#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000341 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342#else
343 PyErr_SetString(PyExc_ValueError, "no encoding specified");
344 goto onError;
345#endif
346 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000347
348 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000349 v = PyCodec_Decode(str, encoding, errors);
350 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000351 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352
353 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000356 return NULL;
357}
358
359PyObject *PyString_AsDecodedString(PyObject *str,
360 const char *encoding,
361 const char *errors)
362{
363 PyObject *v;
364
365 v = PyString_AsDecodedObject(str, encoding, errors);
366 if (v == NULL)
367 goto onError;
368
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000369#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000370 /* Convert Unicode to a string using the default encoding */
371 if (PyUnicode_Check(v)) {
372 PyObject *temp = v;
373 v = PyUnicode_AsEncodedString(v, NULL, NULL);
374 Py_DECREF(temp);
375 if (v == NULL)
376 goto onError;
377 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 if (!PyString_Check(v)) {
380 PyErr_Format(PyExc_TypeError,
381 "decoder did not return a string object (type=%.400s)",
382 v->ob_type->tp_name);
383 Py_DECREF(v);
384 goto onError;
385 }
386
387 return v;
388
389 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000390 return NULL;
391}
392
393PyObject *PyString_Encode(const char *s,
394 int size,
395 const char *encoding,
396 const char *errors)
397{
398 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000399
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000400 str = PyString_FromStringAndSize(s, size);
401 if (str == NULL)
402 return NULL;
403 v = PyString_AsEncodedString(str, encoding, errors);
404 Py_DECREF(str);
405 return v;
406}
407
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000408PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 const char *encoding,
410 const char *errors)
411{
412 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000413
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 if (!PyString_Check(str)) {
415 PyErr_BadArgument();
416 goto onError;
417 }
418
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000419 if (encoding == NULL) {
420#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000421 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422#else
423 PyErr_SetString(PyExc_ValueError, "no encoding specified");
424 goto onError;
425#endif
426 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427
428 /* Encode via the codec registry */
429 v = PyCodec_Encode(str, encoding, errors);
430 if (v == NULL)
431 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432
433 return v;
434
435 onError:
436 return NULL;
437}
438
439PyObject *PyString_AsEncodedString(PyObject *str,
440 const char *encoding,
441 const char *errors)
442{
443 PyObject *v;
444
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000445 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000446 if (v == NULL)
447 goto onError;
448
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000449#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000450 /* Convert Unicode to a string using the default encoding */
451 if (PyUnicode_Check(v)) {
452 PyObject *temp = v;
453 v = PyUnicode_AsEncodedString(v, NULL, NULL);
454 Py_DECREF(temp);
455 if (v == NULL)
456 goto onError;
457 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 if (!PyString_Check(v)) {
460 PyErr_Format(PyExc_TypeError,
461 "encoder did not return a string object (type=%.400s)",
462 v->ob_type->tp_name);
463 Py_DECREF(v);
464 goto onError;
465 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000466
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000467 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000468
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 onError:
470 return NULL;
471}
472
Guido van Rossum234f9421993-06-17 12:35:49 +0000473static void
Fred Drakeba096332000-07-09 07:04:36 +0000474string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000475{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000476 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000477}
478
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000479static int
480string_getsize(register PyObject *op)
481{
482 char *s;
483 int len;
484 if (PyString_AsStringAndSize(op, &s, &len))
485 return -1;
486 return len;
487}
488
489static /*const*/ char *
490string_getbuffer(register PyObject *op)
491{
492 char *s;
493 int len;
494 if (PyString_AsStringAndSize(op, &s, &len))
495 return NULL;
496 return s;
497}
498
Guido van Rossumd7047b31995-01-02 19:07:15 +0000499int
Fred Drakeba096332000-07-09 07:04:36 +0000500PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (!PyString_Check(op))
503 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000505}
506
507/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000515int
516PyString_AsStringAndSize(register PyObject *obj,
517 register char **s,
518 register int *len)
519{
520 if (s == NULL) {
521 PyErr_BadInternalCall();
522 return -1;
523 }
524
525 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000526#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527 if (PyUnicode_Check(obj)) {
528 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
529 if (obj == NULL)
530 return -1;
531 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000532 else
533#endif
534 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000535 PyErr_Format(PyExc_TypeError,
536 "expected string or Unicode object, "
537 "%.200s found", obj->ob_type->tp_name);
538 return -1;
539 }
540 }
541
542 *s = PyString_AS_STRING(obj);
543 if (len != NULL)
544 *len = PyString_GET_SIZE(obj);
545 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
546 PyErr_SetString(PyExc_TypeError,
547 "expected string without null bytes");
548 return -1;
549 }
550 return 0;
551}
552
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000553/* Methods */
554
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000555static int
Fred Drakeba096332000-07-09 07:04:36 +0000556string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000557{
558 int i;
559 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000560 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000561 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000562 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000566
Thomas Wouters7e474022000-07-16 12:04:32 +0000567 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000568 quote = '\'';
569 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
570 quote = '"';
571
572 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573 for (i = 0; i < op->ob_size; i++) {
574 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000575 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000577 else if (c == '\t')
578 fprintf(fp, "\\t");
579 else if (c == '\n')
580 fprintf(fp, "\\n");
581 else if (c == '\r')
582 fprintf(fp, "\\r");
583 else if (c < ' ' || c >= 0x7f)
584 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000589 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590}
591
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000592static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000593string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000595 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
596 PyObject *v;
597 if (newsize > INT_MAX) {
598 PyErr_SetString(PyExc_OverflowError,
599 "string is too large to make repr");
600 }
601 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000602 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000603 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 }
605 else {
606 register int i;
607 register char c;
608 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 int quote;
610
Thomas Wouters7e474022000-07-16 12:04:32 +0000611 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 quote = '\'';
613 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
614 quote = '"';
615
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000616 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000617 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 for (i = 0; i < op->ob_size; i++) {
619 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000622 else if (c == '\t')
623 *p++ = '\\', *p++ = 't';
624 else if (c == '\n')
625 *p++ = '\\', *p++ = 'n';
626 else if (c == '\r')
627 *p++ = '\\', *p++ = 'r';
628 else if (c < ' ' || c >= 0x7f) {
629 sprintf(p, "\\x%02x", c & 0xff);
630 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631 }
632 else
633 *p++ = c;
634 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000635 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000636 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000637 _PyString_Resize(
638 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000639 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000641}
642
Guido van Rossum189f1df2001-05-01 16:51:53 +0000643static PyObject *
644string_str(PyObject *s)
645{
646 Py_INCREF(s);
647 return s;
648}
649
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650static int
Fred Drakeba096332000-07-09 07:04:36 +0000651string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652{
653 return a->ob_size;
654}
655
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000656static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000657string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658{
659 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000660 register PyStringObject *op;
661 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000662#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000663 if (PyUnicode_Check(bb))
664 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000666 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000667 "cannot add type \"%.200s\" to string",
668 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 return NULL;
670 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000671#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 /* Optimize cases with empty left or right operand */
673 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 return bb;
676 }
677 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000678 Py_INCREF(a);
679 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000680 }
681 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000682 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000683 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000684 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000685 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000686 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000687 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000688#ifdef CACHE_HASH
689 op->ob_shash = -1;
690#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000691#ifdef INTERN_STRINGS
692 op->ob_sinterned = NULL;
693#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000694 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
695 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
696 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698#undef b
699}
700
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000702string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
704 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000705 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000707 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000708 if (n < 0)
709 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000710 /* watch out for overflows: the size can overflow int,
711 * and the # of bytes needed can overflow size_t
712 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000714 if (n && size / n != a->ob_size) {
715 PyErr_SetString(PyExc_OverflowError,
716 "repeated string is too long");
717 return NULL;
718 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 Py_INCREF(a);
721 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 }
Tim Peters8f422462000-09-09 06:13:41 +0000723 nbytes = size * sizeof(char);
724 if (nbytes / sizeof(char) != (size_t)size ||
725 nbytes + sizeof(PyStringObject) <= nbytes) {
726 PyErr_SetString(PyExc_OverflowError,
727 "repeated string is too long");
728 return NULL;
729 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000731 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000732 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000734 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000735#ifdef CACHE_HASH
736 op->ob_shash = -1;
737#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000738#ifdef INTERN_STRINGS
739 op->ob_sinterned = NULL;
740#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000741 for (i = 0; i < size; i += a->ob_size)
742 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
743 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000744 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745}
746
747/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
748
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000749static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000750string_slice(register PyStringObject *a, register int i, register int j)
751 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000752{
753 if (i < 0)
754 i = 0;
755 if (j < 0)
756 j = 0; /* Avoid signed/unsigned bug in next line */
757 if (j > a->ob_size)
758 j = a->ob_size;
759 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000760 Py_INCREF(a);
761 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000762 }
763 if (j < i)
764 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000766}
767
Guido van Rossum9284a572000-03-07 15:53:43 +0000768static int
Fred Drakeba096332000-07-09 07:04:36 +0000769string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000770{
771 register char *s, *end;
772 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000773#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000774 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000775 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000776#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000777 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000778 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000779 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000780 return -1;
781 }
782 c = PyString_AsString(el)[0];
783 s = PyString_AsString(a);
784 end = s + PyString_Size(a);
785 while (s < end) {
786 if (c == *s++)
787 return 1;
788 }
789 return 0;
790}
791
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000792static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000793string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000795 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000796 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000798 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 return NULL;
800 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000801 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000802 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000803 if (v == NULL)
804 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000805 else {
806#ifdef COUNT_ALLOCS
807 one_strings++;
808#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000809 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000810 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000811 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000812}
813
Martin v. Löwiscd353062001-05-24 16:56:35 +0000814static PyObject*
815string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000816{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000817 int c;
818 int len_a, len_b;
819 int min_len;
820 PyObject *result;
821
822 /* One of the objects is a string object. Make sure the
823 other one is one, too. */
824 if (a->ob_type != b->ob_type) {
825 result = Py_NotImplemented;
826 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000827 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000828 if (a == b) {
829 switch (op) {
830 case Py_EQ:case Py_LE:case Py_GE:
831 result = Py_True;
832 goto out;
833 case Py_NE:case Py_LT:case Py_GT:
834 result = Py_False;
835 goto out;
836 }
837 }
838 if (op == Py_EQ) {
839 /* Supporting Py_NE here as well does not save
840 much time, since Py_NE is rarely used. */
841 if (a->ob_size == b->ob_size
842 && (a->ob_sval[0] == b->ob_sval[0]
843 && memcmp(a->ob_sval, b->ob_sval,
844 a->ob_size) == 0)) {
845 result = Py_True;
846 } else {
847 result = Py_False;
848 }
849 goto out;
850 }
851 len_a = a->ob_size; len_b = b->ob_size;
852 min_len = (len_a < len_b) ? len_a : len_b;
853 if (min_len > 0) {
854 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
855 if (c==0)
856 c = memcmp(a->ob_sval, b->ob_sval, min_len);
857 }else
858 c = 0;
859 if (c == 0)
860 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
861 switch (op) {
862 case Py_LT: c = c < 0; break;
863 case Py_LE: c = c <= 0; break;
864 case Py_EQ: assert(0); break; /* unreachable */
865 case Py_NE: c = c != 0; break;
866 case Py_GT: c = c > 0; break;
867 case Py_GE: c = c >= 0; break;
868 default:
869 result = Py_NotImplemented;
870 goto out;
871 }
872 result = c ? Py_True : Py_False;
873 out:
874 Py_INCREF(result);
875 return result;
876}
877
878int
879_PyString_Eq(PyObject *o1, PyObject *o2)
880{
881 PyStringObject *a, *b;
882 a = (PyStringObject*)o1;
883 b = (PyStringObject*)o2;
884 return a->ob_size == b->ob_size
885 && *a->ob_sval == *b->ob_sval
886 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887}
888
Guido van Rossum9bfef441993-03-29 10:43:31 +0000889static long
Fred Drakeba096332000-07-09 07:04:36 +0000890string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000891{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000892 register int len;
893 register unsigned char *p;
894 register long x;
895
896#ifdef CACHE_HASH
897 if (a->ob_shash != -1)
898 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000899#ifdef INTERN_STRINGS
900 if (a->ob_sinterned != NULL)
901 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000902 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000903#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000904#endif
905 len = a->ob_size;
906 p = (unsigned char *) a->ob_sval;
907 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000908 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000909 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000910 x ^= a->ob_size;
911 if (x == -1)
912 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000913#ifdef CACHE_HASH
914 a->ob_shash = x;
915#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000916 return x;
917}
918
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000919static int
Fred Drakeba096332000-07-09 07:04:36 +0000920string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000921{
922 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000923 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000924 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000925 return -1;
926 }
927 *ptr = (void *)self->ob_sval;
928 return self->ob_size;
929}
930
931static int
Fred Drakeba096332000-07-09 07:04:36 +0000932string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000933{
Guido van Rossum045e6881997-09-08 18:30:11 +0000934 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000935 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000936 return -1;
937}
938
939static int
Fred Drakeba096332000-07-09 07:04:36 +0000940string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000941{
942 if ( lenp )
943 *lenp = self->ob_size;
944 return 1;
945}
946
Guido van Rossum1db70701998-10-08 02:18:52 +0000947static int
Fred Drakeba096332000-07-09 07:04:36 +0000948string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000949{
950 if ( index != 0 ) {
951 PyErr_SetString(PyExc_SystemError,
952 "accessing non-existent string segment");
953 return -1;
954 }
955 *ptr = self->ob_sval;
956 return self->ob_size;
957}
958
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000959static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000960 (inquiry)string_length, /*sq_length*/
961 (binaryfunc)string_concat, /*sq_concat*/
962 (intargfunc)string_repeat, /*sq_repeat*/
963 (intargfunc)string_item, /*sq_item*/
964 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000965 0, /*sq_ass_item*/
966 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000967 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968};
969
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000970static PyBufferProcs string_as_buffer = {
971 (getreadbufferproc)string_buffer_getreadbuf,
972 (getwritebufferproc)string_buffer_getwritebuf,
973 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000974 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000975};
976
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000977
978
979#define LEFTSTRIP 0
980#define RIGHTSTRIP 1
981#define BOTHSTRIP 2
982
983
984static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000985split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000986{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000987 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000988 PyObject* item;
989 PyObject *list = PyList_New(0);
990
991 if (list == NULL)
992 return NULL;
993
Guido van Rossum4c08d552000-03-10 22:55:18 +0000994 for (i = j = 0; i < len; ) {
995 while (i < len && isspace(Py_CHARMASK(s[i])))
996 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000998 while (i < len && !isspace(Py_CHARMASK(s[i])))
999 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001000 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001001 if (maxsplit-- <= 0)
1002 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001003 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1004 if (item == NULL)
1005 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 err = PyList_Append(list, item);
1007 Py_DECREF(item);
1008 if (err < 0)
1009 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001010 while (i < len && isspace(Py_CHARMASK(s[i])))
1011 i++;
1012 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001013 }
1014 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001015 if (j < len) {
1016 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1017 if (item == NULL)
1018 goto finally;
1019 err = PyList_Append(list, item);
1020 Py_DECREF(item);
1021 if (err < 0)
1022 goto finally;
1023 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001024 return list;
1025 finally:
1026 Py_DECREF(list);
1027 return NULL;
1028}
1029
1030
1031static char split__doc__[] =
1032"S.split([sep [,maxsplit]]) -> list of strings\n\
1033\n\
1034Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001035delimiter string. If maxsplit is given, at most maxsplit\n\
1036splits are done. If sep is not specified, any whitespace string\n\
1037is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001038
1039static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001040string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041{
1042 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001043 int maxsplit = -1;
1044 const char *s = PyString_AS_STRING(self), *sub;
1045 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001046
Guido van Rossum4c08d552000-03-10 22:55:18 +00001047 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001048 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001049 if (maxsplit < 0)
1050 maxsplit = INT_MAX;
1051 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 if (PyString_Check(subobj)) {
1054 sub = PyString_AS_STRING(subobj);
1055 n = PyString_GET_SIZE(subobj);
1056 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001058 else if (PyUnicode_Check(subobj))
1059 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001061 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1062 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001063 if (n == 0) {
1064 PyErr_SetString(PyExc_ValueError, "empty separator");
1065 return NULL;
1066 }
1067
1068 list = PyList_New(0);
1069 if (list == NULL)
1070 return NULL;
1071
1072 i = j = 0;
1073 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001074 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001075 if (maxsplit-- <= 0)
1076 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1078 if (item == NULL)
1079 goto fail;
1080 err = PyList_Append(list, item);
1081 Py_DECREF(item);
1082 if (err < 0)
1083 goto fail;
1084 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001085 }
1086 else
1087 i++;
1088 }
1089 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1090 if (item == NULL)
1091 goto fail;
1092 err = PyList_Append(list, item);
1093 Py_DECREF(item);
1094 if (err < 0)
1095 goto fail;
1096
1097 return list;
1098
1099 fail:
1100 Py_DECREF(list);
1101 return NULL;
1102}
1103
1104
1105static char join__doc__[] =
1106"S.join(sequence) -> string\n\
1107\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001108Return a string which is the concatenation of the strings in the\n\
1109sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001110
1111static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001112string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113{
1114 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001115 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117 char *p;
1118 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001119 size_t sz = 0;
1120 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001121 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122
Tim Peters19fe14e2001-01-19 03:03:47 +00001123 seq = PySequence_Fast(orig, "");
1124 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001125 if (PyErr_ExceptionMatches(PyExc_TypeError))
1126 PyErr_Format(PyExc_TypeError,
1127 "sequence expected, %.80s found",
1128 orig->ob_type->tp_name);
1129 return NULL;
1130 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001131
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001132 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001133 if (seqlen == 0) {
1134 Py_DECREF(seq);
1135 return PyString_FromString("");
1136 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001138 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001139 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1140 PyErr_Format(PyExc_TypeError,
1141 "sequence item 0: expected string,"
1142 " %.80s found",
1143 item->ob_type->tp_name);
1144 Py_DECREF(seq);
1145 return NULL;
1146 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001147 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001148 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001149 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001151
Tim Peters19fe14e2001-01-19 03:03:47 +00001152 /* There are at least two things to join. Do a pre-pass to figure out
1153 * the total amount of space we'll need (sz), see whether any argument
1154 * is absurd, and defer to the Unicode join if appropriate.
1155 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001156 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001157 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001158 item = PySequence_Fast_GET_ITEM(seq, i);
1159 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001160#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001161 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001162 /* Defer to Unicode join.
1163 * CAUTION: There's no gurantee that the
1164 * original sequence can be iterated over
1165 * again, so we must pass seq here.
1166 */
1167 PyObject *result;
1168 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001169 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001170 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001171 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001172#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001173 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001174 "sequence item %i: expected string,"
1175 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001177 Py_DECREF(seq);
1178 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001179 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001180 sz += PyString_GET_SIZE(item);
1181 if (i != 0)
1182 sz += seplen;
1183 if (sz < old_sz || sz > INT_MAX) {
1184 PyErr_SetString(PyExc_OverflowError,
1185 "join() is too long for a Python string");
1186 Py_DECREF(seq);
1187 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001189 }
1190
1191 /* Allocate result space. */
1192 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1193 if (res == NULL) {
1194 Py_DECREF(seq);
1195 return NULL;
1196 }
1197
1198 /* Catenate everything. */
1199 p = PyString_AS_STRING(res);
1200 for (i = 0; i < seqlen; ++i) {
1201 size_t n;
1202 item = PySequence_Fast_GET_ITEM(seq, i);
1203 n = PyString_GET_SIZE(item);
1204 memcpy(p, PyString_AS_STRING(item), n);
1205 p += n;
1206 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001207 memcpy(p, sep, seplen);
1208 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001209 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001210 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001211
Jeremy Hylton49048292000-07-11 03:28:17 +00001212 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214}
1215
Tim Peters52e155e2001-06-16 05:42:57 +00001216PyObject *
1217_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001218{
Tim Petersa7259592001-06-16 05:11:17 +00001219 assert(sep != NULL && PyString_Check(sep));
1220 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001221 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001222}
1223
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224static long
Fred Drakeba096332000-07-09 07:04:36 +00001225string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001227 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228 int len = PyString_GET_SIZE(self);
1229 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001230 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001232 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001233 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001234 return -2;
1235 if (PyString_Check(subobj)) {
1236 sub = PyString_AS_STRING(subobj);
1237 n = PyString_GET_SIZE(subobj);
1238 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001239#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001240 else if (PyUnicode_Check(subobj))
1241 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001242#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001244 return -2;
1245
1246 if (last > len)
1247 last = len;
1248 if (last < 0)
1249 last += len;
1250 if (last < 0)
1251 last = 0;
1252 if (i < 0)
1253 i += len;
1254 if (i < 0)
1255 i = 0;
1256
Guido van Rossum4c08d552000-03-10 22:55:18 +00001257 if (dir > 0) {
1258 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001259 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 last -= n;
1261 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001262 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 return (long)i;
1264 }
1265 else {
1266 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001267
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 if (n == 0 && i <= last)
1269 return (long)last;
1270 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001271 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 return (long)j;
1273 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001274
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275 return -1;
1276}
1277
1278
1279static char find__doc__[] =
1280"S.find(sub [,start [,end]]) -> int\n\
1281\n\
1282Return the lowest index in S where substring sub is found,\n\
1283such that sub is contained within s[start,end]. Optional\n\
1284arguments start and end are interpreted as in slice notation.\n\
1285\n\
1286Return -1 on failure.";
1287
1288static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001289string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001291 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292 if (result == -2)
1293 return NULL;
1294 return PyInt_FromLong(result);
1295}
1296
1297
1298static char index__doc__[] =
1299"S.index(sub [,start [,end]]) -> int\n\
1300\n\
1301Like S.find() but raise ValueError when the substring is not found.";
1302
1303static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001304string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001306 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307 if (result == -2)
1308 return NULL;
1309 if (result == -1) {
1310 PyErr_SetString(PyExc_ValueError,
1311 "substring not found in string.index");
1312 return NULL;
1313 }
1314 return PyInt_FromLong(result);
1315}
1316
1317
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318static char rfind__doc__[] =
1319"S.rfind(sub [,start [,end]]) -> int\n\
1320\n\
1321Return the highest index in S where substring sub is found,\n\
1322such that sub is contained within s[start,end]. Optional\n\
1323arguments start and end are interpreted as in slice notation.\n\
1324\n\
1325Return -1 on failure.";
1326
1327static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001328string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331 if (result == -2)
1332 return NULL;
1333 return PyInt_FromLong(result);
1334}
1335
1336
1337static char rindex__doc__[] =
1338"S.rindex(sub [,start [,end]]) -> int\n\
1339\n\
1340Like S.rfind() but raise ValueError when the substring is not found.";
1341
1342static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001343string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 if (result == -2)
1347 return NULL;
1348 if (result == -1) {
1349 PyErr_SetString(PyExc_ValueError,
1350 "substring not found in string.rindex");
1351 return NULL;
1352 }
1353 return PyInt_FromLong(result);
1354}
1355
1356
1357static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001358do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359{
1360 char *s = PyString_AS_STRING(self);
1361 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363 i = 0;
1364 if (striptype != RIGHTSTRIP) {
1365 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1366 i++;
1367 }
1368 }
1369
1370 j = len;
1371 if (striptype != LEFTSTRIP) {
1372 do {
1373 j--;
1374 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1375 j++;
1376 }
1377
1378 if (i == 0 && j == len) {
1379 Py_INCREF(self);
1380 return (PyObject*)self;
1381 }
1382 else
1383 return PyString_FromStringAndSize(s+i, j-i);
1384}
1385
1386
1387static char strip__doc__[] =
1388"S.strip() -> string\n\
1389\n\
1390Return a copy of the string S with leading and trailing\n\
1391whitespace removed.";
1392
1393static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001394string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001396 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397}
1398
1399
1400static char lstrip__doc__[] =
1401"S.lstrip() -> string\n\
1402\n\
1403Return a copy of the string S with leading whitespace removed.";
1404
1405static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001406string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001408 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409}
1410
1411
1412static char rstrip__doc__[] =
1413"S.rstrip() -> string\n\
1414\n\
1415Return a copy of the string S with trailing whitespace removed.";
1416
1417static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001420 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421}
1422
1423
1424static char lower__doc__[] =
1425"S.lower() -> string\n\
1426\n\
1427Return a copy of the string S converted to lowercase.";
1428
1429static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001430string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431{
1432 char *s = PyString_AS_STRING(self), *s_new;
1433 int i, n = PyString_GET_SIZE(self);
1434 PyObject *new;
1435
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 new = PyString_FromStringAndSize(NULL, n);
1437 if (new == NULL)
1438 return NULL;
1439 s_new = PyString_AsString(new);
1440 for (i = 0; i < n; i++) {
1441 int c = Py_CHARMASK(*s++);
1442 if (isupper(c)) {
1443 *s_new = tolower(c);
1444 } else
1445 *s_new = c;
1446 s_new++;
1447 }
1448 return new;
1449}
1450
1451
1452static char upper__doc__[] =
1453"S.upper() -> string\n\
1454\n\
1455Return a copy of the string S converted to uppercase.";
1456
1457static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001458string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
1460 char *s = PyString_AS_STRING(self), *s_new;
1461 int i, n = PyString_GET_SIZE(self);
1462 PyObject *new;
1463
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464 new = PyString_FromStringAndSize(NULL, n);
1465 if (new == NULL)
1466 return NULL;
1467 s_new = PyString_AsString(new);
1468 for (i = 0; i < n; i++) {
1469 int c = Py_CHARMASK(*s++);
1470 if (islower(c)) {
1471 *s_new = toupper(c);
1472 } else
1473 *s_new = c;
1474 s_new++;
1475 }
1476 return new;
1477}
1478
1479
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480static char title__doc__[] =
1481"S.title() -> string\n\
1482\n\
1483Return a titlecased version of S, i.e. words start with uppercase\n\
1484characters, all remaining cased characters have lowercase.";
1485
1486static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001487string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001488{
1489 char *s = PyString_AS_STRING(self), *s_new;
1490 int i, n = PyString_GET_SIZE(self);
1491 int previous_is_cased = 0;
1492 PyObject *new;
1493
Guido van Rossum4c08d552000-03-10 22:55:18 +00001494 new = PyString_FromStringAndSize(NULL, n);
1495 if (new == NULL)
1496 return NULL;
1497 s_new = PyString_AsString(new);
1498 for (i = 0; i < n; i++) {
1499 int c = Py_CHARMASK(*s++);
1500 if (islower(c)) {
1501 if (!previous_is_cased)
1502 c = toupper(c);
1503 previous_is_cased = 1;
1504 } else if (isupper(c)) {
1505 if (previous_is_cased)
1506 c = tolower(c);
1507 previous_is_cased = 1;
1508 } else
1509 previous_is_cased = 0;
1510 *s_new++ = c;
1511 }
1512 return new;
1513}
1514
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515static char capitalize__doc__[] =
1516"S.capitalize() -> string\n\
1517\n\
1518Return a copy of the string S with only its first character\n\
1519capitalized.";
1520
1521static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001522string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001523{
1524 char *s = PyString_AS_STRING(self), *s_new;
1525 int i, n = PyString_GET_SIZE(self);
1526 PyObject *new;
1527
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528 new = PyString_FromStringAndSize(NULL, n);
1529 if (new == NULL)
1530 return NULL;
1531 s_new = PyString_AsString(new);
1532 if (0 < n) {
1533 int c = Py_CHARMASK(*s++);
1534 if (islower(c))
1535 *s_new = toupper(c);
1536 else
1537 *s_new = c;
1538 s_new++;
1539 }
1540 for (i = 1; i < n; i++) {
1541 int c = Py_CHARMASK(*s++);
1542 if (isupper(c))
1543 *s_new = tolower(c);
1544 else
1545 *s_new = c;
1546 s_new++;
1547 }
1548 return new;
1549}
1550
1551
1552static char count__doc__[] =
1553"S.count(sub[, start[, end]]) -> int\n\
1554\n\
1555Return the number of occurrences of substring sub in string\n\
1556S[start:end]. Optional arguments start and end are\n\
1557interpreted as in slice notation.";
1558
1559static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001560string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001563 int len = PyString_GET_SIZE(self), n;
1564 int i = 0, last = INT_MAX;
1565 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567
Guido van Rossumc6821402000-05-08 14:08:05 +00001568 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1569 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001571
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 if (PyString_Check(subobj)) {
1573 sub = PyString_AS_STRING(subobj);
1574 n = PyString_GET_SIZE(subobj);
1575 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001576#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001577 else if (PyUnicode_Check(subobj)) {
1578 int count;
1579 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1580 if (count == -1)
1581 return NULL;
1582 else
1583 return PyInt_FromLong((long) count);
1584 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001585#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001586 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1587 return NULL;
1588
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589 if (last > len)
1590 last = len;
1591 if (last < 0)
1592 last += len;
1593 if (last < 0)
1594 last = 0;
1595 if (i < 0)
1596 i += len;
1597 if (i < 0)
1598 i = 0;
1599 m = last + 1 - n;
1600 if (n == 0)
1601 return PyInt_FromLong((long) (m-i));
1602
1603 r = 0;
1604 while (i < m) {
1605 if (!memcmp(s+i, sub, n)) {
1606 r++;
1607 i += n;
1608 } else {
1609 i++;
1610 }
1611 }
1612 return PyInt_FromLong((long) r);
1613}
1614
1615
1616static char swapcase__doc__[] =
1617"S.swapcase() -> string\n\
1618\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001619Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620converted to lowercase and vice versa.";
1621
1622static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001623string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624{
1625 char *s = PyString_AS_STRING(self), *s_new;
1626 int i, n = PyString_GET_SIZE(self);
1627 PyObject *new;
1628
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 new = PyString_FromStringAndSize(NULL, n);
1630 if (new == NULL)
1631 return NULL;
1632 s_new = PyString_AsString(new);
1633 for (i = 0; i < n; i++) {
1634 int c = Py_CHARMASK(*s++);
1635 if (islower(c)) {
1636 *s_new = toupper(c);
1637 }
1638 else if (isupper(c)) {
1639 *s_new = tolower(c);
1640 }
1641 else
1642 *s_new = c;
1643 s_new++;
1644 }
1645 return new;
1646}
1647
1648
1649static char translate__doc__[] =
1650"S.translate(table [,deletechars]) -> string\n\
1651\n\
1652Return a copy of the string S, where all characters occurring\n\
1653in the optional argument deletechars are removed, and the\n\
1654remaining characters have been mapped through the given\n\
1655translation table, which must be a string of length 256.";
1656
1657static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001658string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001660 register char *input, *output;
1661 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662 register int i, c, changed = 0;
1663 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 int inlen, tablen, dellen = 0;
1666 PyObject *result;
1667 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669
Guido van Rossum4c08d552000-03-10 22:55:18 +00001670 if (!PyArg_ParseTuple(args, "O|O:translate",
1671 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673
1674 if (PyString_Check(tableobj)) {
1675 table1 = PyString_AS_STRING(tableobj);
1676 tablen = PyString_GET_SIZE(tableobj);
1677 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001678#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001680 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 parameter; instead a mapping to None will cause characters
1682 to be deleted. */
1683 if (delobj != NULL) {
1684 PyErr_SetString(PyExc_TypeError,
1685 "deletions are implemented differently for unicode");
1686 return NULL;
1687 }
1688 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1689 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001690#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001691 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693
1694 if (delobj != NULL) {
1695 if (PyString_Check(delobj)) {
1696 del_table = PyString_AS_STRING(delobj);
1697 dellen = PyString_GET_SIZE(delobj);
1698 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 else if (PyUnicode_Check(delobj)) {
1701 PyErr_SetString(PyExc_TypeError,
1702 "deletions are implemented differently for unicode");
1703 return NULL;
1704 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001705#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001706 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1707 return NULL;
1708
1709 if (tablen != 256) {
1710 PyErr_SetString(PyExc_ValueError,
1711 "translation table must be 256 characters long");
1712 return NULL;
1713 }
1714 }
1715 else {
1716 del_table = NULL;
1717 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718 }
1719
1720 table = table1;
1721 inlen = PyString_Size(input_obj);
1722 result = PyString_FromStringAndSize((char *)NULL, inlen);
1723 if (result == NULL)
1724 return NULL;
1725 output_start = output = PyString_AsString(result);
1726 input = PyString_AsString(input_obj);
1727
1728 if (dellen == 0) {
1729 /* If no deletions are required, use faster code */
1730 for (i = inlen; --i >= 0; ) {
1731 c = Py_CHARMASK(*input++);
1732 if (Py_CHARMASK((*output++ = table[c])) != c)
1733 changed = 1;
1734 }
1735 if (changed)
1736 return result;
1737 Py_DECREF(result);
1738 Py_INCREF(input_obj);
1739 return input_obj;
1740 }
1741
1742 for (i = 0; i < 256; i++)
1743 trans_table[i] = Py_CHARMASK(table[i]);
1744
1745 for (i = 0; i < dellen; i++)
1746 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1747
1748 for (i = inlen; --i >= 0; ) {
1749 c = Py_CHARMASK(*input++);
1750 if (trans_table[c] != -1)
1751 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1752 continue;
1753 changed = 1;
1754 }
1755 if (!changed) {
1756 Py_DECREF(result);
1757 Py_INCREF(input_obj);
1758 return input_obj;
1759 }
1760 /* Fix the size of the resulting string */
1761 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1762 return NULL;
1763 return result;
1764}
1765
1766
1767/* What follows is used for implementing replace(). Perry Stoll. */
1768
1769/*
1770 mymemfind
1771
1772 strstr replacement for arbitrary blocks of memory.
1773
Barry Warsaw51ac5802000-03-20 16:36:48 +00001774 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775 contents of memory pointed to by PAT. Returns the index into MEM if
1776 found, or -1 if not found. If len of PAT is greater than length of
1777 MEM, the function returns -1.
1778*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001779static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001780mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781{
1782 register int ii;
1783
1784 /* pattern can not occur in the last pat_len-1 chars */
1785 len -= pat_len;
1786
1787 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001788 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789 return ii;
1790 }
1791 }
1792 return -1;
1793}
1794
1795/*
1796 mymemcnt
1797
1798 Return the number of distinct times PAT is found in MEM.
1799 meaning mem=1111 and pat==11 returns 2.
1800 mem=11111 and pat==11 also return 2.
1801 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001802static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001803mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804{
1805 register int offset = 0;
1806 int nfound = 0;
1807
1808 while (len >= 0) {
1809 offset = mymemfind(mem, len, pat, pat_len);
1810 if (offset == -1)
1811 break;
1812 mem += offset + pat_len;
1813 len -= offset + pat_len;
1814 nfound++;
1815 }
1816 return nfound;
1817}
1818
1819/*
1820 mymemreplace
1821
Thomas Wouters7e474022000-07-16 12:04:32 +00001822 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 replaced with SUB.
1824
Thomas Wouters7e474022000-07-16 12:04:32 +00001825 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 of PAT in STR, then the original string is returned. Otherwise, a new
1827 string is allocated here and returned.
1828
1829 on return, out_len is:
1830 the length of output string, or
1831 -1 if the input string is returned, or
1832 unchanged if an error occurs (no memory).
1833
1834 return value is:
1835 the new string allocated locally, or
1836 NULL if an error occurred.
1837*/
1838static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001839mymemreplace(const char *str, int len, /* input string */
1840 const char *pat, int pat_len, /* pattern string to find */
1841 const char *sub, int sub_len, /* substitution string */
1842 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001843 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844{
1845 char *out_s;
1846 char *new_s;
1847 int nfound, offset, new_len;
1848
1849 if (len == 0 || pat_len > len)
1850 goto return_same;
1851
1852 /* find length of output string */
1853 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001854 if (count < 0)
1855 count = INT_MAX;
1856 else if (nfound > count)
1857 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 if (nfound == 0)
1859 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001860
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001862 if (new_len == 0) {
1863 /* Have to allocate something for the caller to free(). */
1864 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001865 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001866 return NULL;
1867 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001869 else {
1870 assert(new_len > 0);
1871 new_s = (char *)PyMem_MALLOC(new_len);
1872 if (new_s == NULL)
1873 return NULL;
1874 out_s = new_s;
1875
Tim Peters9c012af2001-05-10 00:32:57 +00001876 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001877 /* find index of next instance of pattern */
1878 offset = mymemfind(str, len, pat, pat_len);
1879 if (offset == -1)
1880 break;
1881
1882 /* copy non matching part of input string */
1883 memcpy(new_s, str, offset);
1884 str += offset + pat_len;
1885 len -= offset + pat_len;
1886
1887 /* copy substitute into the output string */
1888 new_s += offset;
1889 memcpy(new_s, sub, sub_len);
1890 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001891 }
1892 /* copy any remaining values into output string */
1893 if (len > 0)
1894 memcpy(new_s, str, len);
1895 }
1896 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 return out_s;
1898
1899 return_same:
1900 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001901 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902}
1903
1904
1905static char replace__doc__[] =
1906"S.replace (old, new[, maxsplit]) -> string\n\
1907\n\
1908Return a copy of string S with all occurrences of substring\n\
1909old replaced by new. If the optional argument maxsplit is\n\
1910given, only the first maxsplit occurrences are replaced.";
1911
1912static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001913string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001915 const char *str = PyString_AS_STRING(self), *sub, *repl;
1916 char *new_s;
1917 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1918 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001920 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921
Guido van Rossum4c08d552000-03-10 22:55:18 +00001922 if (!PyArg_ParseTuple(args, "OO|i:replace",
1923 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925
1926 if (PyString_Check(subobj)) {
1927 sub = PyString_AS_STRING(subobj);
1928 sub_len = PyString_GET_SIZE(subobj);
1929 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001930#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001931 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001932 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001933 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001934#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001935 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1936 return NULL;
1937
1938 if (PyString_Check(replobj)) {
1939 repl = PyString_AS_STRING(replobj);
1940 repl_len = PyString_GET_SIZE(replobj);
1941 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001942#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001944 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001946#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001947 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1948 return NULL;
1949
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001950 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001951 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952 return NULL;
1953 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 if (new_s == NULL) {
1956 PyErr_NoMemory();
1957 return NULL;
1958 }
1959 if (out_len == -1) {
1960 /* we're returning another reference to self */
1961 new = (PyObject*)self;
1962 Py_INCREF(new);
1963 }
1964 else {
1965 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001966 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967 }
1968 return new;
1969}
1970
1971
1972static char startswith__doc__[] =
1973"S.startswith(prefix[, start[, end]]) -> int\n\
1974\n\
1975Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1976optional start, test S beginning at that position. With optional end, stop\n\
1977comparing S at that position.";
1978
1979static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001980string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001982 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985 int plen;
1986 int start = 0;
1987 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
Guido van Rossumc6821402000-05-08 14:08:05 +00001990 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1991 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001992 return NULL;
1993 if (PyString_Check(subobj)) {
1994 prefix = PyString_AS_STRING(subobj);
1995 plen = PyString_GET_SIZE(subobj);
1996 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001997#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001998 else if (PyUnicode_Check(subobj)) {
1999 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002000 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002001 subobj, start, end, -1);
2002 if (rc == -1)
2003 return NULL;
2004 else
2005 return PyInt_FromLong((long) rc);
2006 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002007#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 return NULL;
2010
2011 /* adopt Java semantics for index out of range. it is legal for
2012 * offset to be == plen, but this only returns true if prefix is
2013 * the empty string.
2014 */
2015 if (start < 0 || start+plen > len)
2016 return PyInt_FromLong(0);
2017
2018 if (!memcmp(str+start, prefix, plen)) {
2019 /* did the match end after the specified end? */
2020 if (end < 0)
2021 return PyInt_FromLong(1);
2022 else if (end - start < plen)
2023 return PyInt_FromLong(0);
2024 else
2025 return PyInt_FromLong(1);
2026 }
2027 else return PyInt_FromLong(0);
2028}
2029
2030
2031static char endswith__doc__[] =
2032"S.endswith(suffix[, start[, end]]) -> int\n\
2033\n\
2034Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2035optional start, test S beginning at that position. With optional end, stop\n\
2036comparing S at that position.";
2037
2038static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002039string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002041 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 const char* suffix;
2044 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 int start = 0;
2046 int end = -1;
2047 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049
Guido van Rossumc6821402000-05-08 14:08:05 +00002050 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2051 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 return NULL;
2053 if (PyString_Check(subobj)) {
2054 suffix = PyString_AS_STRING(subobj);
2055 slen = PyString_GET_SIZE(subobj);
2056 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002057#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002058 else if (PyUnicode_Check(subobj)) {
2059 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002060 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002061 subobj, start, end, +1);
2062 if (rc == -1)
2063 return NULL;
2064 else
2065 return PyInt_FromLong((long) rc);
2066 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002067#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069 return NULL;
2070
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 return PyInt_FromLong(0);
2073
2074 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002075 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076
Guido van Rossum4c08d552000-03-10 22:55:18 +00002077 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078 return PyInt_FromLong(1);
2079 else return PyInt_FromLong(0);
2080}
2081
2082
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002083static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002084"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002085\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002086Encodes S using the codec registered for encoding. encoding defaults\n\
2087to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002088handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2089a ValueError. Other possible values are 'ignore' and 'replace'.";
2090
2091static PyObject *
2092string_encode(PyStringObject *self, PyObject *args)
2093{
2094 char *encoding = NULL;
2095 char *errors = NULL;
2096 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2097 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002098 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2099}
2100
2101
2102static char decode__doc__[] =
2103"S.decode([encoding[,errors]]) -> object\n\
2104\n\
2105Decodes S using the codec registered for encoding. encoding defaults\n\
2106to the default encoding. errors may be given to set a different error\n\
2107handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2108a ValueError. Other possible values are 'ignore' and 'replace'.";
2109
2110static PyObject *
2111string_decode(PyStringObject *self, PyObject *args)
2112{
2113 char *encoding = NULL;
2114 char *errors = NULL;
2115 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2116 return NULL;
2117 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002118}
2119
2120
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121static char expandtabs__doc__[] =
2122"S.expandtabs([tabsize]) -> string\n\
2123\n\
2124Return a copy of S where all tab characters are expanded using spaces.\n\
2125If tabsize is not given, a tab size of 8 characters is assumed.";
2126
2127static PyObject*
2128string_expandtabs(PyStringObject *self, PyObject *args)
2129{
2130 const char *e, *p;
2131 char *q;
2132 int i, j;
2133 PyObject *u;
2134 int tabsize = 8;
2135
2136 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2137 return NULL;
2138
Thomas Wouters7e474022000-07-16 12:04:32 +00002139 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002140 i = j = 0;
2141 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2142 for (p = PyString_AS_STRING(self); p < e; p++)
2143 if (*p == '\t') {
2144 if (tabsize > 0)
2145 j += tabsize - (j % tabsize);
2146 }
2147 else {
2148 j++;
2149 if (*p == '\n' || *p == '\r') {
2150 i += j;
2151 j = 0;
2152 }
2153 }
2154
2155 /* Second pass: create output string and fill it */
2156 u = PyString_FromStringAndSize(NULL, i + j);
2157 if (!u)
2158 return NULL;
2159
2160 j = 0;
2161 q = PyString_AS_STRING(u);
2162
2163 for (p = PyString_AS_STRING(self); p < e; p++)
2164 if (*p == '\t') {
2165 if (tabsize > 0) {
2166 i = tabsize - (j % tabsize);
2167 j += i;
2168 while (i--)
2169 *q++ = ' ';
2170 }
2171 }
2172 else {
2173 j++;
2174 *q++ = *p;
2175 if (*p == '\n' || *p == '\r')
2176 j = 0;
2177 }
2178
2179 return u;
2180}
2181
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002182static
2183PyObject *pad(PyStringObject *self,
2184 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 int right,
2186 char fill)
2187{
2188 PyObject *u;
2189
2190 if (left < 0)
2191 left = 0;
2192 if (right < 0)
2193 right = 0;
2194
2195 if (left == 0 && right == 0) {
2196 Py_INCREF(self);
2197 return (PyObject *)self;
2198 }
2199
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002200 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 left + PyString_GET_SIZE(self) + right);
2202 if (u) {
2203 if (left)
2204 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002205 memcpy(PyString_AS_STRING(u) + left,
2206 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 PyString_GET_SIZE(self));
2208 if (right)
2209 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2210 fill, right);
2211 }
2212
2213 return u;
2214}
2215
2216static char ljust__doc__[] =
2217"S.ljust(width) -> string\n\
2218\n\
2219Return S left justified in a string of length width. Padding is\n\
2220done using spaces.";
2221
2222static PyObject *
2223string_ljust(PyStringObject *self, PyObject *args)
2224{
2225 int width;
2226 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2227 return NULL;
2228
2229 if (PyString_GET_SIZE(self) >= width) {
2230 Py_INCREF(self);
2231 return (PyObject*) self;
2232 }
2233
2234 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2235}
2236
2237
2238static char rjust__doc__[] =
2239"S.rjust(width) -> string\n\
2240\n\
2241Return S right justified in a string of length width. Padding is\n\
2242done using spaces.";
2243
2244static PyObject *
2245string_rjust(PyStringObject *self, PyObject *args)
2246{
2247 int width;
2248 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2249 return NULL;
2250
2251 if (PyString_GET_SIZE(self) >= width) {
2252 Py_INCREF(self);
2253 return (PyObject*) self;
2254 }
2255
2256 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2257}
2258
2259
2260static char center__doc__[] =
2261"S.center(width) -> string\n\
2262\n\
2263Return S centered in a string of length width. Padding is done\n\
2264using spaces.";
2265
2266static PyObject *
2267string_center(PyStringObject *self, PyObject *args)
2268{
2269 int marg, left;
2270 int width;
2271
2272 if (!PyArg_ParseTuple(args, "i:center", &width))
2273 return NULL;
2274
2275 if (PyString_GET_SIZE(self) >= width) {
2276 Py_INCREF(self);
2277 return (PyObject*) self;
2278 }
2279
2280 marg = width - PyString_GET_SIZE(self);
2281 left = marg / 2 + (marg & width & 1);
2282
2283 return pad(self, left, marg - left, ' ');
2284}
2285
2286#if 0
2287static char zfill__doc__[] =
2288"S.zfill(width) -> string\n\
2289\n\
2290Pad a numeric string x with zeros on the left, to fill a field\n\
2291of the specified width. The string x is never truncated.";
2292
2293static PyObject *
2294string_zfill(PyStringObject *self, PyObject *args)
2295{
2296 int fill;
2297 PyObject *u;
2298 char *str;
2299
2300 int width;
2301 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2302 return NULL;
2303
2304 if (PyString_GET_SIZE(self) >= width) {
2305 Py_INCREF(self);
2306 return (PyObject*) self;
2307 }
2308
2309 fill = width - PyString_GET_SIZE(self);
2310
2311 u = pad(self, fill, 0, '0');
2312 if (u == NULL)
2313 return NULL;
2314
2315 str = PyString_AS_STRING(u);
2316 if (str[fill] == '+' || str[fill] == '-') {
2317 /* move sign to beginning of string */
2318 str[0] = str[fill];
2319 str[fill] = '0';
2320 }
2321
2322 return u;
2323}
2324#endif
2325
2326static char isspace__doc__[] =
2327"S.isspace() -> int\n\
2328\n\
2329Return 1 if there are only whitespace characters in S,\n\
23300 otherwise.";
2331
2332static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002333string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334{
Fred Drakeba096332000-07-09 07:04:36 +00002335 register const unsigned char *p
2336 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002337 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 /* Shortcut for single character strings */
2340 if (PyString_GET_SIZE(self) == 1 &&
2341 isspace(*p))
2342 return PyInt_FromLong(1);
2343
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002344 /* Special case for empty strings */
2345 if (PyString_GET_SIZE(self) == 0)
2346 return PyInt_FromLong(0);
2347
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 e = p + PyString_GET_SIZE(self);
2349 for (; p < e; p++) {
2350 if (!isspace(*p))
2351 return PyInt_FromLong(0);
2352 }
2353 return PyInt_FromLong(1);
2354}
2355
2356
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002357static char isalpha__doc__[] =
2358"S.isalpha() -> int\n\
2359\n\
2360Return 1 if all characters in S are alphabetic\n\
2361and there is at least one character in S, 0 otherwise.";
2362
2363static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002364string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002365{
Fred Drakeba096332000-07-09 07:04:36 +00002366 register const unsigned char *p
2367 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002368 register const unsigned char *e;
2369
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002370 /* Shortcut for single character strings */
2371 if (PyString_GET_SIZE(self) == 1 &&
2372 isalpha(*p))
2373 return PyInt_FromLong(1);
2374
2375 /* Special case for empty strings */
2376 if (PyString_GET_SIZE(self) == 0)
2377 return PyInt_FromLong(0);
2378
2379 e = p + PyString_GET_SIZE(self);
2380 for (; p < e; p++) {
2381 if (!isalpha(*p))
2382 return PyInt_FromLong(0);
2383 }
2384 return PyInt_FromLong(1);
2385}
2386
2387
2388static char isalnum__doc__[] =
2389"S.isalnum() -> int\n\
2390\n\
2391Return 1 if all characters in S are alphanumeric\n\
2392and there is at least one character in S, 0 otherwise.";
2393
2394static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002395string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002396{
Fred Drakeba096332000-07-09 07:04:36 +00002397 register const unsigned char *p
2398 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002399 register const unsigned char *e;
2400
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002401 /* Shortcut for single character strings */
2402 if (PyString_GET_SIZE(self) == 1 &&
2403 isalnum(*p))
2404 return PyInt_FromLong(1);
2405
2406 /* Special case for empty strings */
2407 if (PyString_GET_SIZE(self) == 0)
2408 return PyInt_FromLong(0);
2409
2410 e = p + PyString_GET_SIZE(self);
2411 for (; p < e; p++) {
2412 if (!isalnum(*p))
2413 return PyInt_FromLong(0);
2414 }
2415 return PyInt_FromLong(1);
2416}
2417
2418
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419static char isdigit__doc__[] =
2420"S.isdigit() -> int\n\
2421\n\
2422Return 1 if there are only digit characters in S,\n\
24230 otherwise.";
2424
2425static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002426string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427{
Fred Drakeba096332000-07-09 07:04:36 +00002428 register const unsigned char *p
2429 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002430 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 /* Shortcut for single character strings */
2433 if (PyString_GET_SIZE(self) == 1 &&
2434 isdigit(*p))
2435 return PyInt_FromLong(1);
2436
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002437 /* Special case for empty strings */
2438 if (PyString_GET_SIZE(self) == 0)
2439 return PyInt_FromLong(0);
2440
Guido van Rossum4c08d552000-03-10 22:55:18 +00002441 e = p + PyString_GET_SIZE(self);
2442 for (; p < e; p++) {
2443 if (!isdigit(*p))
2444 return PyInt_FromLong(0);
2445 }
2446 return PyInt_FromLong(1);
2447}
2448
2449
2450static char islower__doc__[] =
2451"S.islower() -> int\n\
2452\n\
2453Return 1 if all cased characters in S are lowercase and there is\n\
2454at least one cased character in S, 0 otherwise.";
2455
2456static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002457string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458{
Fred Drakeba096332000-07-09 07:04:36 +00002459 register const unsigned char *p
2460 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002461 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462 int cased;
2463
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 /* Shortcut for single character strings */
2465 if (PyString_GET_SIZE(self) == 1)
2466 return PyInt_FromLong(islower(*p) != 0);
2467
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002468 /* Special case for empty strings */
2469 if (PyString_GET_SIZE(self) == 0)
2470 return PyInt_FromLong(0);
2471
Guido van Rossum4c08d552000-03-10 22:55:18 +00002472 e = p + PyString_GET_SIZE(self);
2473 cased = 0;
2474 for (; p < e; p++) {
2475 if (isupper(*p))
2476 return PyInt_FromLong(0);
2477 else if (!cased && islower(*p))
2478 cased = 1;
2479 }
2480 return PyInt_FromLong(cased);
2481}
2482
2483
2484static char isupper__doc__[] =
2485"S.isupper() -> int\n\
2486\n\
2487Return 1 if all cased characters in S are uppercase and there is\n\
2488at least one cased character in S, 0 otherwise.";
2489
2490static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002491string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002492{
Fred Drakeba096332000-07-09 07:04:36 +00002493 register const unsigned char *p
2494 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002495 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 int cased;
2497
Guido van Rossum4c08d552000-03-10 22:55:18 +00002498 /* Shortcut for single character strings */
2499 if (PyString_GET_SIZE(self) == 1)
2500 return PyInt_FromLong(isupper(*p) != 0);
2501
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002502 /* Special case for empty strings */
2503 if (PyString_GET_SIZE(self) == 0)
2504 return PyInt_FromLong(0);
2505
Guido van Rossum4c08d552000-03-10 22:55:18 +00002506 e = p + PyString_GET_SIZE(self);
2507 cased = 0;
2508 for (; p < e; p++) {
2509 if (islower(*p))
2510 return PyInt_FromLong(0);
2511 else if (!cased && isupper(*p))
2512 cased = 1;
2513 }
2514 return PyInt_FromLong(cased);
2515}
2516
2517
2518static char istitle__doc__[] =
2519"S.istitle() -> int\n\
2520\n\
2521Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2522may only follow uncased characters and lowercase characters only cased\n\
2523ones. Return 0 otherwise.";
2524
2525static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002526string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527{
Fred Drakeba096332000-07-09 07:04:36 +00002528 register const unsigned char *p
2529 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002530 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 int cased, previous_is_cased;
2532
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 /* Shortcut for single character strings */
2534 if (PyString_GET_SIZE(self) == 1)
2535 return PyInt_FromLong(isupper(*p) != 0);
2536
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002537 /* Special case for empty strings */
2538 if (PyString_GET_SIZE(self) == 0)
2539 return PyInt_FromLong(0);
2540
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541 e = p + PyString_GET_SIZE(self);
2542 cased = 0;
2543 previous_is_cased = 0;
2544 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002545 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546
2547 if (isupper(ch)) {
2548 if (previous_is_cased)
2549 return PyInt_FromLong(0);
2550 previous_is_cased = 1;
2551 cased = 1;
2552 }
2553 else if (islower(ch)) {
2554 if (!previous_is_cased)
2555 return PyInt_FromLong(0);
2556 previous_is_cased = 1;
2557 cased = 1;
2558 }
2559 else
2560 previous_is_cased = 0;
2561 }
2562 return PyInt_FromLong(cased);
2563}
2564
2565
2566static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002567"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002568\n\
2569Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002570Line breaks are not included in the resulting list unless keepends\n\
2571is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002572
2573#define SPLIT_APPEND(data, left, right) \
2574 str = PyString_FromStringAndSize(data + left, right - left); \
2575 if (!str) \
2576 goto onError; \
2577 if (PyList_Append(list, str)) { \
2578 Py_DECREF(str); \
2579 goto onError; \
2580 } \
2581 else \
2582 Py_DECREF(str);
2583
2584static PyObject*
2585string_splitlines(PyStringObject *self, PyObject *args)
2586{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002587 register int i;
2588 register int j;
2589 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002590 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 PyObject *list;
2592 PyObject *str;
2593 char *data;
2594
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002595 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 return NULL;
2597
2598 data = PyString_AS_STRING(self);
2599 len = PyString_GET_SIZE(self);
2600
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 list = PyList_New(0);
2602 if (!list)
2603 goto onError;
2604
2605 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002606 int eol;
2607
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608 /* Find a line and append it */
2609 while (i < len && data[i] != '\n' && data[i] != '\r')
2610 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611
2612 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002613 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 if (i < len) {
2615 if (data[i] == '\r' && i + 1 < len &&
2616 data[i+1] == '\n')
2617 i += 2;
2618 else
2619 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002620 if (keepends)
2621 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002622 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002623 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 j = i;
2625 }
2626 if (j < len) {
2627 SPLIT_APPEND(data, j, len);
2628 }
2629
2630 return list;
2631
2632 onError:
2633 Py_DECREF(list);
2634 return NULL;
2635}
2636
2637#undef SPLIT_APPEND
2638
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002639
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002640static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002641string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642 /* Counterparts of the obsolete stropmodule functions; except
2643 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002644 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2645 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2646 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2647 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2648 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2649 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2650 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2651 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2652 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2653 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2654 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2655 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2656 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2657 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2658 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2659 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2660 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2661 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2662 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2663 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2664 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2665 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2666 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2667 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2668 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2669 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2670 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2671 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2672 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2673 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2674 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2675 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2676 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002677#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002678 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002680 {NULL, NULL} /* sentinel */
2681};
2682
2683static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002684string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002685{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002686 PyObject *x = NULL;
2687 static char *kwlist[] = {"object", 0};
2688
2689 assert(type == &PyString_Type);
2690 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2691 return NULL;
2692 if (x == NULL)
2693 return PyString_FromString("");
2694 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002695}
2696
Tim Peters6d6c1a32001-08-02 04:15:00 +00002697static char string_doc[] =
2698"str(object) -> string\n\
2699\n\
2700Return a nice string representation of the object.\n\
2701If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002702
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002703PyTypeObject PyString_Type = {
2704 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002705 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002706 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002707 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002708 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002709 (destructor)string_dealloc, /* tp_dealloc */
2710 (printfunc)string_print, /* tp_print */
2711 0, /* tp_getattr */
2712 0, /* tp_setattr */
2713 0, /* tp_compare */
2714 (reprfunc)string_repr, /* tp_repr */
2715 0, /* tp_as_number */
2716 &string_as_sequence, /* tp_as_sequence */
2717 0, /* tp_as_mapping */
2718 (hashfunc)string_hash, /* tp_hash */
2719 0, /* tp_call */
2720 (reprfunc)string_str, /* tp_str */
2721 PyObject_GenericGetAttr, /* tp_getattro */
2722 0, /* tp_setattro */
2723 &string_as_buffer, /* tp_as_buffer */
2724 Py_TPFLAGS_DEFAULT, /* tp_flags */
2725 string_doc, /* tp_doc */
2726 0, /* tp_traverse */
2727 0, /* tp_clear */
2728 (richcmpfunc)string_richcompare, /* tp_richcompare */
2729 0, /* tp_weaklistoffset */
2730 0, /* tp_iter */
2731 0, /* tp_iternext */
2732 string_methods, /* tp_methods */
2733 0, /* tp_members */
2734 0, /* tp_getset */
2735 0, /* tp_base */
2736 0, /* tp_dict */
2737 0, /* tp_descr_get */
2738 0, /* tp_descr_set */
2739 0, /* tp_dictoffset */
2740 0, /* tp_init */
2741 0, /* tp_alloc */
2742 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002743};
2744
2745void
Fred Drakeba096332000-07-09 07:04:36 +00002746PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002747{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002748 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002749 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002750 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 if (w == NULL || !PyString_Check(*pv)) {
2752 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002753 *pv = NULL;
2754 return;
2755 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 v = string_concat((PyStringObject *) *pv, w);
2757 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002758 *pv = v;
2759}
2760
Guido van Rossum013142a1994-08-30 08:19:36 +00002761void
Fred Drakeba096332000-07-09 07:04:36 +00002762PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002763{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 PyString_Concat(pv, w);
2765 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002766}
2767
2768
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002769/* The following function breaks the notion that strings are immutable:
2770 it changes the size of a string. We get away with this only if there
2771 is only one module referencing the object. You can also think of it
2772 as creating a new string object and destroying the old one, only
2773 more efficiently. In any case, don't use this if the string may
2774 already be known to some other part of the code... */
2775
2776int
Fred Drakeba096332000-07-09 07:04:36 +00002777_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002778{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002779 register PyObject *v;
2780 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002781 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002783 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 Py_DECREF(v);
2785 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002786 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002787 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002788 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002789#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002790 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002791#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002792 _Py_ForgetReference(v);
2793 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002794 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002796 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002797 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002799 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002800 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 _Py_NewReference(*pv);
2802 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002803 sv->ob_size = newsize;
2804 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002805 return 0;
2806}
Guido van Rossume5372401993-03-16 12:15:04 +00002807
2808/* Helpers for formatstring */
2809
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002810static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002811getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002812{
2813 int argidx = *p_argidx;
2814 if (argidx < arglen) {
2815 (*p_argidx)++;
2816 if (arglen < 0)
2817 return args;
2818 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002819 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002820 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002821 PyErr_SetString(PyExc_TypeError,
2822 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002823 return NULL;
2824}
2825
Tim Peters38fd5b62000-09-21 05:43:11 +00002826/* Format codes
2827 * F_LJUST '-'
2828 * F_SIGN '+'
2829 * F_BLANK ' '
2830 * F_ALT '#'
2831 * F_ZERO '0'
2832 */
Guido van Rossume5372401993-03-16 12:15:04 +00002833#define F_LJUST (1<<0)
2834#define F_SIGN (1<<1)
2835#define F_BLANK (1<<2)
2836#define F_ALT (1<<3)
2837#define F_ZERO (1<<4)
2838
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002839static int
Fred Drakeba096332000-07-09 07:04:36 +00002840formatfloat(char *buf, size_t buflen, int flags,
2841 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002842{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002843 /* fmt = '%#.' + `prec` + `type`
2844 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002845 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002846 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002847 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002848 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002849 if (prec < 0)
2850 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002851 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2852 type = 'g';
2853 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002854 /* worst case length calc to ensure no buffer overrun:
2855 fmt = %#.<prec>g
2856 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002857 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002858 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2859 If prec=0 the effective precision is 1 (the leading digit is
2860 always given), therefore increase by one to 10+prec. */
2861 if (buflen <= (size_t)10 + (size_t)prec) {
2862 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002863 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002864 return -1;
2865 }
Guido van Rossume5372401993-03-16 12:15:04 +00002866 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002867 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002868}
2869
Tim Peters38fd5b62000-09-21 05:43:11 +00002870/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2871 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2872 * Python's regular ints.
2873 * Return value: a new PyString*, or NULL if error.
2874 * . *pbuf is set to point into it,
2875 * *plen set to the # of chars following that.
2876 * Caller must decref it when done using pbuf.
2877 * The string starting at *pbuf is of the form
2878 * "-"? ("0x" | "0X")? digit+
2879 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002880 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002881 * There will be at least prec digits, zero-filled on the left if
2882 * necessary to get that many.
2883 * val object to be converted
2884 * flags bitmask of format flags; only F_ALT is looked at
2885 * prec minimum number of digits; 0-fill on left if needed
2886 * type a character in [duoxX]; u acts the same as d
2887 *
2888 * CAUTION: o, x and X conversions on regular ints can never
2889 * produce a '-' sign, but can for Python's unbounded ints.
2890 */
2891PyObject*
2892_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2893 char **pbuf, int *plen)
2894{
2895 PyObject *result = NULL;
2896 char *buf;
2897 int i;
2898 int sign; /* 1 if '-', else 0 */
2899 int len; /* number of characters */
2900 int numdigits; /* len == numnondigits + numdigits */
2901 int numnondigits = 0;
2902
2903 switch (type) {
2904 case 'd':
2905 case 'u':
2906 result = val->ob_type->tp_str(val);
2907 break;
2908 case 'o':
2909 result = val->ob_type->tp_as_number->nb_oct(val);
2910 break;
2911 case 'x':
2912 case 'X':
2913 numnondigits = 2;
2914 result = val->ob_type->tp_as_number->nb_hex(val);
2915 break;
2916 default:
2917 assert(!"'type' not in [duoxX]");
2918 }
2919 if (!result)
2920 return NULL;
2921
2922 /* To modify the string in-place, there can only be one reference. */
2923 if (result->ob_refcnt != 1) {
2924 PyErr_BadInternalCall();
2925 return NULL;
2926 }
2927 buf = PyString_AsString(result);
2928 len = PyString_Size(result);
2929 if (buf[len-1] == 'L') {
2930 --len;
2931 buf[len] = '\0';
2932 }
2933 sign = buf[0] == '-';
2934 numnondigits += sign;
2935 numdigits = len - numnondigits;
2936 assert(numdigits > 0);
2937
Tim Petersfff53252001-04-12 18:38:48 +00002938 /* Get rid of base marker unless F_ALT */
2939 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002940 /* Need to skip 0x, 0X or 0. */
2941 int skipped = 0;
2942 switch (type) {
2943 case 'o':
2944 assert(buf[sign] == '0');
2945 /* If 0 is only digit, leave it alone. */
2946 if (numdigits > 1) {
2947 skipped = 1;
2948 --numdigits;
2949 }
2950 break;
2951 case 'x':
2952 case 'X':
2953 assert(buf[sign] == '0');
2954 assert(buf[sign + 1] == 'x');
2955 skipped = 2;
2956 numnondigits -= 2;
2957 break;
2958 }
2959 if (skipped) {
2960 buf += skipped;
2961 len -= skipped;
2962 if (sign)
2963 buf[0] = '-';
2964 }
2965 assert(len == numnondigits + numdigits);
2966 assert(numdigits > 0);
2967 }
2968
2969 /* Fill with leading zeroes to meet minimum width. */
2970 if (prec > numdigits) {
2971 PyObject *r1 = PyString_FromStringAndSize(NULL,
2972 numnondigits + prec);
2973 char *b1;
2974 if (!r1) {
2975 Py_DECREF(result);
2976 return NULL;
2977 }
2978 b1 = PyString_AS_STRING(r1);
2979 for (i = 0; i < numnondigits; ++i)
2980 *b1++ = *buf++;
2981 for (i = 0; i < prec - numdigits; i++)
2982 *b1++ = '0';
2983 for (i = 0; i < numdigits; i++)
2984 *b1++ = *buf++;
2985 *b1 = '\0';
2986 Py_DECREF(result);
2987 result = r1;
2988 buf = PyString_AS_STRING(result);
2989 len = numnondigits + prec;
2990 }
2991
2992 /* Fix up case for hex conversions. */
2993 switch (type) {
2994 case 'x':
2995 /* Need to convert all upper case letters to lower case. */
2996 for (i = 0; i < len; i++)
2997 if (buf[i] >= 'A' && buf[i] <= 'F')
2998 buf[i] += 'a'-'A';
2999 break;
3000 case 'X':
3001 /* Need to convert 0x to 0X (and -0x to -0X). */
3002 if (buf[sign + 1] == 'x')
3003 buf[sign + 1] = 'X';
3004 break;
3005 }
3006 *pbuf = buf;
3007 *plen = len;
3008 return result;
3009}
3010
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003011static int
Fred Drakeba096332000-07-09 07:04:36 +00003012formatint(char *buf, size_t buflen, int flags,
3013 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003014{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003015 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003016 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3017 + 1 + 1 = 24 */
3018 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003019 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003020 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003021 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003022 if (prec < 0)
3023 prec = 1;
3024 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003025 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003026 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003027 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003028 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003029 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003030 return -1;
3031 }
Guido van Rossume5372401993-03-16 12:15:04 +00003032 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003033 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3034 * but we want it (for consistency with other %#x conversions, and
3035 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003036 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3037 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3038 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003039 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003040 if (x == 0 &&
3041 (flags & F_ALT) &&
3042 (type == 'x' || type == 'X') &&
3043 buf[1] != (char)type) /* this last always true under std C */
3044 {
Tim Petersfff53252001-04-12 18:38:48 +00003045 memmove(buf+2, buf, strlen(buf) + 1);
3046 buf[0] = '0';
3047 buf[1] = (char)type;
3048 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003049 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003050}
3051
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003052static int
Fred Drakeba096332000-07-09 07:04:36 +00003053formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003054{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003055 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003056 if (PyString_Check(v)) {
3057 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003058 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003059 }
3060 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003061 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003062 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003063 }
3064 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003065 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003066}
3067
Guido van Rossum013142a1994-08-30 08:19:36 +00003068
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003069/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3070
3071 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3072 chars are formatted. XXX This is a magic number. Each formatting
3073 routine does bounds checking to ensure no overflow, but a better
3074 solution may be to malloc a buffer of appropriate size for each
3075 format. For now, the current solution is sufficient.
3076*/
3077#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003078
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003079PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003080PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003081{
3082 char *fmt, *res;
3083 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003084 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003085 PyObject *result, *orig_args;
3086#ifdef Py_USING_UNICODE
3087 PyObject *v, *w;
3088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003089 PyObject *dict = NULL;
3090 if (format == NULL || !PyString_Check(format) || args == NULL) {
3091 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003092 return NULL;
3093 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003094 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 fmt = PyString_AsString(format);
3096 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003097 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003098 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003099 if (result == NULL)
3100 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003101 res = PyString_AsString(result);
3102 if (PyTuple_Check(args)) {
3103 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003104 argidx = 0;
3105 }
3106 else {
3107 arglen = -1;
3108 argidx = -2;
3109 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003110 if (args->ob_type->tp_as_mapping)
3111 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003112 while (--fmtcnt >= 0) {
3113 if (*fmt != '%') {
3114 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003115 rescnt = fmtcnt + 100;
3116 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003117 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003118 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003119 res = PyString_AsString(result)
3120 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003121 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003122 }
3123 *res++ = *fmt++;
3124 }
3125 else {
3126 /* Got a format specifier */
3127 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003128 int width = -1;
3129 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003130 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003131 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003132 PyObject *v = NULL;
3133 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003134 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003135 int sign;
3136 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003137 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003138#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003139 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003140 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003141#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003142
Guido van Rossumda9c2711996-12-05 21:58:58 +00003143 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003144 if (*fmt == '(') {
3145 char *keystart;
3146 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003147 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003148 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003149
3150 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003151 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003152 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003153 goto error;
3154 }
3155 ++fmt;
3156 --fmtcnt;
3157 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003158 /* Skip over balanced parentheses */
3159 while (pcount > 0 && --fmtcnt >= 0) {
3160 if (*fmt == ')')
3161 --pcount;
3162 else if (*fmt == '(')
3163 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003164 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003165 }
3166 keylen = fmt - keystart - 1;
3167 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003168 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003169 "incomplete format key");
3170 goto error;
3171 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003172 key = PyString_FromStringAndSize(keystart,
3173 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003174 if (key == NULL)
3175 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003176 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003177 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003178 args_owned = 0;
3179 }
3180 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003181 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003182 if (args == NULL) {
3183 goto error;
3184 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003185 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003186 arglen = -1;
3187 argidx = -2;
3188 }
Guido van Rossume5372401993-03-16 12:15:04 +00003189 while (--fmtcnt >= 0) {
3190 switch (c = *fmt++) {
3191 case '-': flags |= F_LJUST; continue;
3192 case '+': flags |= F_SIGN; continue;
3193 case ' ': flags |= F_BLANK; continue;
3194 case '#': flags |= F_ALT; continue;
3195 case '0': flags |= F_ZERO; continue;
3196 }
3197 break;
3198 }
3199 if (c == '*') {
3200 v = getnextarg(args, arglen, &argidx);
3201 if (v == NULL)
3202 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003203 if (!PyInt_Check(v)) {
3204 PyErr_SetString(PyExc_TypeError,
3205 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003206 goto error;
3207 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003208 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003209 if (width < 0) {
3210 flags |= F_LJUST;
3211 width = -width;
3212 }
Guido van Rossume5372401993-03-16 12:15:04 +00003213 if (--fmtcnt >= 0)
3214 c = *fmt++;
3215 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003216 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003217 width = c - '0';
3218 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003219 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003220 if (!isdigit(c))
3221 break;
3222 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003223 PyErr_SetString(
3224 PyExc_ValueError,
3225 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003226 goto error;
3227 }
3228 width = width*10 + (c - '0');
3229 }
3230 }
3231 if (c == '.') {
3232 prec = 0;
3233 if (--fmtcnt >= 0)
3234 c = *fmt++;
3235 if (c == '*') {
3236 v = getnextarg(args, arglen, &argidx);
3237 if (v == NULL)
3238 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003239 if (!PyInt_Check(v)) {
3240 PyErr_SetString(
3241 PyExc_TypeError,
3242 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003243 goto error;
3244 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003245 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003246 if (prec < 0)
3247 prec = 0;
3248 if (--fmtcnt >= 0)
3249 c = *fmt++;
3250 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003251 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003252 prec = c - '0';
3253 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003254 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003255 if (!isdigit(c))
3256 break;
3257 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003258 PyErr_SetString(
3259 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003260 "prec too big");
3261 goto error;
3262 }
3263 prec = prec*10 + (c - '0');
3264 }
3265 }
3266 } /* prec */
3267 if (fmtcnt >= 0) {
3268 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003269 if (--fmtcnt >= 0)
3270 c = *fmt++;
3271 }
3272 }
3273 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003274 PyErr_SetString(PyExc_ValueError,
3275 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003276 goto error;
3277 }
3278 if (c != '%') {
3279 v = getnextarg(args, arglen, &argidx);
3280 if (v == NULL)
3281 goto error;
3282 }
3283 sign = 0;
3284 fill = ' ';
3285 switch (c) {
3286 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003287 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003288 len = 1;
3289 break;
3290 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003291 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003292#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003293 if (PyUnicode_Check(v)) {
3294 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003295 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003296 goto unicode;
3297 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003298#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003299 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003300 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003301 else
3302 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003303 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003304 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003305 if (!PyString_Check(temp)) {
3306 PyErr_SetString(PyExc_TypeError,
3307 "%s argument has non-string str()");
3308 goto error;
3309 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003310 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003311 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003312 if (prec >= 0 && len > prec)
3313 len = prec;
3314 break;
3315 case 'i':
3316 case 'd':
3317 case 'u':
3318 case 'o':
3319 case 'x':
3320 case 'X':
3321 if (c == 'i')
3322 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003323 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003324 temp = _PyString_FormatLong(v, flags,
3325 prec, c, &pbuf, &len);
3326 if (!temp)
3327 goto error;
3328 /* unbounded ints can always produce
3329 a sign character! */
3330 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003331 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003332 else {
3333 pbuf = formatbuf;
3334 len = formatint(pbuf, sizeof(formatbuf),
3335 flags, prec, c, v);
3336 if (len < 0)
3337 goto error;
3338 /* only d conversion is signed */
3339 sign = c == 'd';
3340 }
3341 if (flags & F_ZERO)
3342 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003343 break;
3344 case 'e':
3345 case 'E':
3346 case 'f':
3347 case 'g':
3348 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003349 pbuf = formatbuf;
3350 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003351 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003352 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003353 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003354 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003355 fill = '0';
3356 break;
3357 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003358 pbuf = formatbuf;
3359 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003360 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003361 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003362 break;
3363 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003364 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003365 "unsupported format character '%c' (0x%x) "
3366 "at index %i",
3367 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003368 goto error;
3369 }
3370 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003371 if (*pbuf == '-' || *pbuf == '+') {
3372 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003373 len--;
3374 }
3375 else if (flags & F_SIGN)
3376 sign = '+';
3377 else if (flags & F_BLANK)
3378 sign = ' ';
3379 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003380 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003381 }
3382 if (width < len)
3383 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003384 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003385 reslen -= rescnt;
3386 rescnt = width + fmtcnt + 100;
3387 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003388 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003389 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003390 res = PyString_AsString(result)
3391 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003392 }
3393 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003394 if (fill != ' ')
3395 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003396 rescnt--;
3397 if (width > len)
3398 width--;
3399 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003400 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3401 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003402 assert(pbuf[1] == c);
3403 if (fill != ' ') {
3404 *res++ = *pbuf++;
3405 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003406 }
Tim Petersfff53252001-04-12 18:38:48 +00003407 rescnt -= 2;
3408 width -= 2;
3409 if (width < 0)
3410 width = 0;
3411 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003412 }
3413 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003414 do {
3415 --rescnt;
3416 *res++ = fill;
3417 } while (--width > len);
3418 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003419 if (fill == ' ') {
3420 if (sign)
3421 *res++ = sign;
3422 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003423 (c == 'x' || c == 'X')) {
3424 assert(pbuf[0] == '0');
3425 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003426 *res++ = *pbuf++;
3427 *res++ = *pbuf++;
3428 }
3429 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003430 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003431 res += len;
3432 rescnt -= len;
3433 while (--width >= len) {
3434 --rescnt;
3435 *res++ = ' ';
3436 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003437 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003438 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003439 "not all arguments converted");
3440 goto error;
3441 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003442 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003443 } /* '%' */
3444 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003445 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 PyErr_SetString(PyExc_TypeError,
3447 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003448 goto error;
3449 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003450 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003451 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003452 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003453 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003454 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003455
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003456#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003457 unicode:
3458 if (args_owned) {
3459 Py_DECREF(args);
3460 args_owned = 0;
3461 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003462 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003463 if (PyTuple_Check(orig_args) && argidx > 0) {
3464 PyObject *v;
3465 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3466 v = PyTuple_New(n);
3467 if (v == NULL)
3468 goto error;
3469 while (--n >= 0) {
3470 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3471 Py_INCREF(w);
3472 PyTuple_SET_ITEM(v, n, w);
3473 }
3474 args = v;
3475 } else {
3476 Py_INCREF(orig_args);
3477 args = orig_args;
3478 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003479 args_owned = 1;
3480 /* Take what we have of the result and let the Unicode formatting
3481 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003482 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003483 if (_PyString_Resize(&result, rescnt))
3484 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003485 fmtcnt = PyString_GET_SIZE(format) - \
3486 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003487 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3488 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003489 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003490 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003491 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003492 if (v == NULL)
3493 goto error;
3494 /* Paste what we have (result) to what the Unicode formatting
3495 function returned (v) and return the result (or error) */
3496 w = PyUnicode_Concat(result, v);
3497 Py_DECREF(result);
3498 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003499 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003500 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003501#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003502
Guido van Rossume5372401993-03-16 12:15:04 +00003503 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003505 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003506 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003507 }
Guido van Rossume5372401993-03-16 12:15:04 +00003508 return NULL;
3509}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003510
3511
3512#ifdef INTERN_STRINGS
3513
Barry Warsaw4df762f2000-08-16 23:41:01 +00003514/* This dictionary will leak at PyString_Fini() time. That's acceptable
3515 * because PyString_Fini() specifically frees interned strings that are
3516 * only referenced by this dictionary. The CVS log entry for revision 2.45
3517 * says:
3518 *
3519 * Change the Fini function to only remove otherwise unreferenced
3520 * strings from the interned table. There are references in
3521 * hard-to-find static variables all over the interpreter, and it's not
3522 * worth trying to get rid of all those; but "uninterning" isn't fair
3523 * either and may cause subtle failures later -- so we have to keep them
3524 * in the interned table.
3525 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003526static PyObject *interned;
3527
3528void
Fred Drakeba096332000-07-09 07:04:36 +00003529PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003530{
3531 register PyStringObject *s = (PyStringObject *)(*p);
3532 PyObject *t;
3533 if (s == NULL || !PyString_Check(s))
3534 Py_FatalError("PyString_InternInPlace: strings only please!");
3535 if ((t = s->ob_sinterned) != NULL) {
3536 if (t == (PyObject *)s)
3537 return;
3538 Py_INCREF(t);
3539 *p = t;
3540 Py_DECREF(s);
3541 return;
3542 }
3543 if (interned == NULL) {
3544 interned = PyDict_New();
3545 if (interned == NULL)
3546 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003547 }
3548 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3549 Py_INCREF(t);
3550 *p = s->ob_sinterned = t;
3551 Py_DECREF(s);
3552 return;
3553 }
3554 t = (PyObject *)s;
3555 if (PyDict_SetItem(interned, t, t) == 0) {
3556 s->ob_sinterned = t;
3557 return;
3558 }
3559 PyErr_Clear();
3560}
3561
3562
3563PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003564PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003565{
3566 PyObject *s = PyString_FromString(cp);
3567 if (s == NULL)
3568 return NULL;
3569 PyString_InternInPlace(&s);
3570 return s;
3571}
3572
3573#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003574
3575void
Fred Drakeba096332000-07-09 07:04:36 +00003576PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003577{
3578 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003579 for (i = 0; i < UCHAR_MAX + 1; i++) {
3580 Py_XDECREF(characters[i]);
3581 characters[i] = NULL;
3582 }
3583#ifndef DONT_SHARE_SHORT_STRINGS
3584 Py_XDECREF(nullstring);
3585 nullstring = NULL;
3586#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003587#ifdef INTERN_STRINGS
3588 if (interned) {
3589 int pos, changed;
3590 PyObject *key, *value;
3591 do {
3592 changed = 0;
3593 pos = 0;
3594 while (PyDict_Next(interned, &pos, &key, &value)) {
3595 if (key->ob_refcnt == 2 && key == value) {
3596 PyDict_DelItem(interned, key);
3597 changed = 1;
3598 }
3599 }
3600 } while (changed);
3601 }
3602#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003603}
Barry Warsawa903ad982001-02-23 16:40:48 +00003604
3605#ifdef INTERN_STRINGS
3606void _Py_ReleaseInternedStrings(void)
3607{
3608 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003609 fprintf(stderr, "releasing interned strings\n");
3610 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003611 Py_DECREF(interned);
3612 interned = NULL;
3613 }
3614}
3615#endif /* INTERN_STRINGS */