blob: 7937b46a3d9454a45e9363fd71c9c3823c690672 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000936 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000937 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000938 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 if (n < 0)
940 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000941 /* watch out for overflows: the size can overflow int,
942 * and the # of bytes needed can overflow size_t
943 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000945 if (n && size / n != a->ob_size) {
946 PyErr_SetString(PyExc_OverflowError,
947 "repeated string is too long");
948 return NULL;
949 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000950 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000951 Py_INCREF(a);
952 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953 }
Tim Peters8f422462000-09-09 06:13:41 +0000954 nbytes = size * sizeof(char);
955 if (nbytes / sizeof(char) != (size_t)size ||
956 nbytes + sizeof(PyStringObject) <= nbytes) {
957 PyErr_SetString(PyExc_OverflowError,
958 "repeated string is too long");
959 return NULL;
960 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000962 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000963 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000965 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000966 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000967 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000968 for (i = 0; i < size; i += a->ob_size)
969 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
970 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972}
973
974/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
975
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000977string_slice(register PyStringObject *a, register int i, register int j)
978 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979{
980 if (i < 0)
981 i = 0;
982 if (j < 0)
983 j = 0; /* Avoid signed/unsigned bug in next line */
984 if (j > a->ob_size)
985 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000986 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
987 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 Py_INCREF(a);
989 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990 }
991 if (j < i)
992 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994}
995
Guido van Rossum9284a572000-03-07 15:53:43 +0000996static int
Fred Drakeba096332000-07-09 07:04:36 +0000997string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000998{
Barry Warsaw817918c2002-08-06 16:58:21 +0000999 const char *lhs, *rhs, *end;
1000 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001001
1002 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001003#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001004 if (PyUnicode_Check(el))
1005 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001006#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001007 if (!PyString_Check(el)) {
1008 PyErr_SetString(PyExc_TypeError,
1009 "'in <string>' requires string as left operand");
1010 return -1;
1011 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001012 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001014 rhs = PyString_AS_STRING(el);
1015 lhs = PyString_AS_STRING(a);
1016
1017 /* optimize for a single character */
1018 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001019 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001020
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001021 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001022 while (lhs <= end) {
1023 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001024 return 1;
1025 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001026
Guido van Rossum9284a572000-03-07 15:53:43 +00001027 return 0;
1028}
1029
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001031string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001032{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001033 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001034 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001035 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001036 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001037 return NULL;
1038 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001039 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001040 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001041 if (v == NULL)
1042 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001043 else {
1044#ifdef COUNT_ALLOCS
1045 one_strings++;
1046#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001047 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001048 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001049 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050}
1051
Martin v. Löwiscd353062001-05-24 16:56:35 +00001052static PyObject*
1053string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001055 int c;
1056 int len_a, len_b;
1057 int min_len;
1058 PyObject *result;
1059
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001060 /* Make sure both arguments are strings. */
1061 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001062 result = Py_NotImplemented;
1063 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001064 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001065 if (a == b) {
1066 switch (op) {
1067 case Py_EQ:case Py_LE:case Py_GE:
1068 result = Py_True;
1069 goto out;
1070 case Py_NE:case Py_LT:case Py_GT:
1071 result = Py_False;
1072 goto out;
1073 }
1074 }
1075 if (op == Py_EQ) {
1076 /* Supporting Py_NE here as well does not save
1077 much time, since Py_NE is rarely used. */
1078 if (a->ob_size == b->ob_size
1079 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001080 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001081 a->ob_size) == 0)) {
1082 result = Py_True;
1083 } else {
1084 result = Py_False;
1085 }
1086 goto out;
1087 }
1088 len_a = a->ob_size; len_b = b->ob_size;
1089 min_len = (len_a < len_b) ? len_a : len_b;
1090 if (min_len > 0) {
1091 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1092 if (c==0)
1093 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1094 }else
1095 c = 0;
1096 if (c == 0)
1097 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1098 switch (op) {
1099 case Py_LT: c = c < 0; break;
1100 case Py_LE: c = c <= 0; break;
1101 case Py_EQ: assert(0); break; /* unreachable */
1102 case Py_NE: c = c != 0; break;
1103 case Py_GT: c = c > 0; break;
1104 case Py_GE: c = c >= 0; break;
1105 default:
1106 result = Py_NotImplemented;
1107 goto out;
1108 }
1109 result = c ? Py_True : Py_False;
1110 out:
1111 Py_INCREF(result);
1112 return result;
1113}
1114
1115int
1116_PyString_Eq(PyObject *o1, PyObject *o2)
1117{
1118 PyStringObject *a, *b;
1119 a = (PyStringObject*)o1;
1120 b = (PyStringObject*)o2;
1121 return a->ob_size == b->ob_size
1122 && *a->ob_sval == *b->ob_sval
1123 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001124}
1125
Guido van Rossum9bfef441993-03-29 10:43:31 +00001126static long
Fred Drakeba096332000-07-09 07:04:36 +00001127string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001128{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 register int len;
1130 register unsigned char *p;
1131 register long x;
1132
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001133 if (a->ob_shash != -1)
1134 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001135 len = a->ob_size;
1136 p = (unsigned char *) a->ob_sval;
1137 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001138 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001139 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001140 x ^= a->ob_size;
1141 if (x == -1)
1142 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001143 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001144 return x;
1145}
1146
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001147static PyObject*
1148string_subscript(PyStringObject* self, PyObject* item)
1149{
1150 if (PyInt_Check(item)) {
1151 long i = PyInt_AS_LONG(item);
1152 if (i < 0)
1153 i += PyString_GET_SIZE(self);
1154 return string_item(self,i);
1155 }
1156 else if (PyLong_Check(item)) {
1157 long i = PyLong_AsLong(item);
1158 if (i == -1 && PyErr_Occurred())
1159 return NULL;
1160 if (i < 0)
1161 i += PyString_GET_SIZE(self);
1162 return string_item(self,i);
1163 }
1164 else if (PySlice_Check(item)) {
1165 int start, stop, step, slicelength, cur, i;
1166 char* source_buf;
1167 char* result_buf;
1168 PyObject* result;
1169
1170 if (PySlice_GetIndicesEx((PySliceObject*)item,
1171 PyString_GET_SIZE(self),
1172 &start, &stop, &step, &slicelength) < 0) {
1173 return NULL;
1174 }
1175
1176 if (slicelength <= 0) {
1177 return PyString_FromStringAndSize("", 0);
1178 }
1179 else {
1180 source_buf = PyString_AsString((PyObject*)self);
1181 result_buf = PyMem_Malloc(slicelength);
1182
1183 for (cur = start, i = 0; i < slicelength;
1184 cur += step, i++) {
1185 result_buf[i] = source_buf[cur];
1186 }
1187
1188 result = PyString_FromStringAndSize(result_buf,
1189 slicelength);
1190 PyMem_Free(result_buf);
1191 return result;
1192 }
1193 }
1194 else {
1195 PyErr_SetString(PyExc_TypeError,
1196 "string indices must be integers");
1197 return NULL;
1198 }
1199}
1200
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001201static int
Fred Drakeba096332000-07-09 07:04:36 +00001202string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001203{
1204 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001205 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001206 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001207 return -1;
1208 }
1209 *ptr = (void *)self->ob_sval;
1210 return self->ob_size;
1211}
1212
1213static int
Fred Drakeba096332000-07-09 07:04:36 +00001214string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001215{
Guido van Rossum045e6881997-09-08 18:30:11 +00001216 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001217 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001218 return -1;
1219}
1220
1221static int
Fred Drakeba096332000-07-09 07:04:36 +00001222string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001223{
1224 if ( lenp )
1225 *lenp = self->ob_size;
1226 return 1;
1227}
1228
Guido van Rossum1db70701998-10-08 02:18:52 +00001229static int
Fred Drakeba096332000-07-09 07:04:36 +00001230string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001231{
1232 if ( index != 0 ) {
1233 PyErr_SetString(PyExc_SystemError,
1234 "accessing non-existent string segment");
1235 return -1;
1236 }
1237 *ptr = self->ob_sval;
1238 return self->ob_size;
1239}
1240
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001241static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001242 (inquiry)string_length, /*sq_length*/
1243 (binaryfunc)string_concat, /*sq_concat*/
1244 (intargfunc)string_repeat, /*sq_repeat*/
1245 (intargfunc)string_item, /*sq_item*/
1246 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001247 0, /*sq_ass_item*/
1248 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001249 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001250};
1251
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252static PyMappingMethods string_as_mapping = {
1253 (inquiry)string_length,
1254 (binaryfunc)string_subscript,
1255 0,
1256};
1257
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258static PyBufferProcs string_as_buffer = {
1259 (getreadbufferproc)string_buffer_getreadbuf,
1260 (getwritebufferproc)string_buffer_getwritebuf,
1261 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001262 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263};
1264
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265
1266
1267#define LEFTSTRIP 0
1268#define RIGHTSTRIP 1
1269#define BOTHSTRIP 2
1270
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001271/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001272static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1273
1274#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001275
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001276
1277static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001278split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001279{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 PyObject* item;
1282 PyObject *list = PyList_New(0);
1283
1284 if (list == NULL)
1285 return NULL;
1286
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 for (i = j = 0; i < len; ) {
1288 while (i < len && isspace(Py_CHARMASK(s[i])))
1289 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001291 while (i < len && !isspace(Py_CHARMASK(s[i])))
1292 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 if (maxsplit-- <= 0)
1295 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1297 if (item == NULL)
1298 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 err = PyList_Append(list, item);
1300 Py_DECREF(item);
1301 if (err < 0)
1302 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303 while (i < len && isspace(Py_CHARMASK(s[i])))
1304 i++;
1305 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 }
1307 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001308 if (j < len) {
1309 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1310 if (item == NULL)
1311 goto finally;
1312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
1316 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317 return list;
1318 finally:
1319 Py_DECREF(list);
1320 return NULL;
1321}
1322
1323
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001324PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325"S.split([sep [,maxsplit]]) -> list of strings\n\
1326\n\
1327Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001329splits are done. If sep is not specified or is None, any\n\
1330whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331
1332static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001333string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
1335 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 int maxsplit = -1;
1337 const char *s = PyString_AS_STRING(self), *sub;
1338 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339
Guido van Rossum4c08d552000-03-10 22:55:18 +00001340 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001342 if (maxsplit < 0)
1343 maxsplit = INT_MAX;
1344 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 if (PyString_Check(subobj)) {
1347 sub = PyString_AS_STRING(subobj);
1348 n = PyString_GET_SIZE(subobj);
1349 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001350#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 else if (PyUnicode_Check(subobj))
1352 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001353#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1355 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356 if (n == 0) {
1357 PyErr_SetString(PyExc_ValueError, "empty separator");
1358 return NULL;
1359 }
1360
1361 list = PyList_New(0);
1362 if (list == NULL)
1363 return NULL;
1364
1365 i = j = 0;
1366 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001367 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001368 if (maxsplit-- <= 0)
1369 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1371 if (item == NULL)
1372 goto fail;
1373 err = PyList_Append(list, item);
1374 Py_DECREF(item);
1375 if (err < 0)
1376 goto fail;
1377 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 }
1379 else
1380 i++;
1381 }
1382 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1383 if (item == NULL)
1384 goto fail;
1385 err = PyList_Append(list, item);
1386 Py_DECREF(item);
1387 if (err < 0)
1388 goto fail;
1389
1390 return list;
1391
1392 fail:
1393 Py_DECREF(list);
1394 return NULL;
1395}
1396
1397
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001398PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399"S.join(sequence) -> string\n\
1400\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001402sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403
1404static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001405string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
1407 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001408 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 char *p;
1411 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001412 size_t sz = 0;
1413 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001414 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415
Tim Peters19fe14e2001-01-19 03:03:47 +00001416 seq = PySequence_Fast(orig, "");
1417 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001418 if (PyErr_ExceptionMatches(PyExc_TypeError))
1419 PyErr_Format(PyExc_TypeError,
1420 "sequence expected, %.80s found",
1421 orig->ob_type->tp_name);
1422 return NULL;
1423 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001424
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001425 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001426 if (seqlen == 0) {
1427 Py_DECREF(seq);
1428 return PyString_FromString("");
1429 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001431 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001432 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1433 PyErr_Format(PyExc_TypeError,
1434 "sequence item 0: expected string,"
1435 " %.80s found",
1436 item->ob_type->tp_name);
1437 Py_DECREF(seq);
1438 return NULL;
1439 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001440 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001441 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001442 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444
Tim Peters19fe14e2001-01-19 03:03:47 +00001445 /* There are at least two things to join. Do a pre-pass to figure out
1446 * the total amount of space we'll need (sz), see whether any argument
1447 * is absurd, and defer to the Unicode join if appropriate.
1448 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001449 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001450 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001451 item = PySequence_Fast_GET_ITEM(seq, i);
1452 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001453#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001454 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001455 /* Defer to Unicode join.
1456 * CAUTION: There's no gurantee that the
1457 * original sequence can be iterated over
1458 * again, so we must pass seq here.
1459 */
1460 PyObject *result;
1461 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001462 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001463 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001465#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001466 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001467 "sequence item %i: expected string,"
1468 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001469 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001470 Py_DECREF(seq);
1471 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001472 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001473 sz += PyString_GET_SIZE(item);
1474 if (i != 0)
1475 sz += seplen;
1476 if (sz < old_sz || sz > INT_MAX) {
1477 PyErr_SetString(PyExc_OverflowError,
1478 "join() is too long for a Python string");
1479 Py_DECREF(seq);
1480 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 }
1483
1484 /* Allocate result space. */
1485 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1486 if (res == NULL) {
1487 Py_DECREF(seq);
1488 return NULL;
1489 }
1490
1491 /* Catenate everything. */
1492 p = PyString_AS_STRING(res);
1493 for (i = 0; i < seqlen; ++i) {
1494 size_t n;
1495 item = PySequence_Fast_GET_ITEM(seq, i);
1496 n = PyString_GET_SIZE(item);
1497 memcpy(p, PyString_AS_STRING(item), n);
1498 p += n;
1499 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001500 memcpy(p, sep, seplen);
1501 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001502 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001504
Jeremy Hylton49048292000-07-11 03:28:17 +00001505 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507}
1508
Tim Peters52e155e2001-06-16 05:42:57 +00001509PyObject *
1510_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001511{
Tim Petersa7259592001-06-16 05:11:17 +00001512 assert(sep != NULL && PyString_Check(sep));
1513 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001514 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001515}
1516
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001517static void
1518string_adjust_indices(int *start, int *end, int len)
1519{
1520 if (*end > len)
1521 *end = len;
1522 else if (*end < 0)
1523 *end += len;
1524 if (*end < 0)
1525 *end = 0;
1526 if (*start < 0)
1527 *start += len;
1528 if (*start < 0)
1529 *start = 0;
1530}
1531
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532static long
Fred Drakeba096332000-07-09 07:04:36 +00001533string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 int len = PyString_GET_SIZE(self);
1537 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001540 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001541 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 return -2;
1543 if (PyString_Check(subobj)) {
1544 sub = PyString_AS_STRING(subobj);
1545 n = PyString_GET_SIZE(subobj);
1546 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001547#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001549 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001550#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 return -2;
1553
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001554 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 if (dir > 0) {
1557 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001559 last -= n;
1560 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001561 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 return (long)i;
1563 }
1564 else {
1565 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001566
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 if (n == 0 && i <= last)
1568 return (long)last;
1569 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001570 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 return (long)j;
1572 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001573
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 return -1;
1575}
1576
1577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579"S.find(sub [,start [,end]]) -> int\n\
1580\n\
1581Return the lowest index in S where substring sub is found,\n\
1582such that sub is contained within s[start,end]. Optional\n\
1583arguments start and end are interpreted as in slice notation.\n\
1584\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001585Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586
1587static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001588string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001590 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591 if (result == -2)
1592 return NULL;
1593 return PyInt_FromLong(result);
1594}
1595
1596
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001597PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598"S.index(sub [,start [,end]]) -> int\n\
1599\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001600Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601
1602static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001603string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001605 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606 if (result == -2)
1607 return NULL;
1608 if (result == -1) {
1609 PyErr_SetString(PyExc_ValueError,
1610 "substring not found in string.index");
1611 return NULL;
1612 }
1613 return PyInt_FromLong(result);
1614}
1615
1616
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618"S.rfind(sub [,start [,end]]) -> int\n\
1619\n\
1620Return the highest index in S where substring sub is found,\n\
1621such that sub is contained within s[start,end]. Optional\n\
1622arguments start and end are interpreted as in slice notation.\n\
1623\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001624Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625
1626static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001627string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001629 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 if (result == -2)
1631 return NULL;
1632 return PyInt_FromLong(result);
1633}
1634
1635
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001636PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637"S.rindex(sub [,start [,end]]) -> int\n\
1638\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001639Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640
1641static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001642string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001644 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645 if (result == -2)
1646 return NULL;
1647 if (result == -1) {
1648 PyErr_SetString(PyExc_ValueError,
1649 "substring not found in string.rindex");
1650 return NULL;
1651 }
1652 return PyInt_FromLong(result);
1653}
1654
1655
1656static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001657do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1658{
1659 char *s = PyString_AS_STRING(self);
1660 int len = PyString_GET_SIZE(self);
1661 char *sep = PyString_AS_STRING(sepobj);
1662 int seplen = PyString_GET_SIZE(sepobj);
1663 int i, j;
1664
1665 i = 0;
1666 if (striptype != RIGHTSTRIP) {
1667 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1668 i++;
1669 }
1670 }
1671
1672 j = len;
1673 if (striptype != LEFTSTRIP) {
1674 do {
1675 j--;
1676 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1677 j++;
1678 }
1679
1680 if (i == 0 && j == len && PyString_CheckExact(self)) {
1681 Py_INCREF(self);
1682 return (PyObject*)self;
1683 }
1684 else
1685 return PyString_FromStringAndSize(s+i, j-i);
1686}
1687
1688
1689static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001690do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691{
1692 char *s = PyString_AS_STRING(self);
1693 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 i = 0;
1696 if (striptype != RIGHTSTRIP) {
1697 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1698 i++;
1699 }
1700 }
1701
1702 j = len;
1703 if (striptype != LEFTSTRIP) {
1704 do {
1705 j--;
1706 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1707 j++;
1708 }
1709
Tim Peters8fa5dd02001-09-12 02:18:30 +00001710 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 Py_INCREF(self);
1712 return (PyObject*)self;
1713 }
1714 else
1715 return PyString_FromStringAndSize(s+i, j-i);
1716}
1717
1718
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001719static PyObject *
1720do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1721{
1722 PyObject *sep = NULL;
1723
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001724 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001725 return NULL;
1726
1727 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001728 if (PyString_Check(sep))
1729 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001730#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001731 else if (PyUnicode_Check(sep)) {
1732 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1733 PyObject *res;
1734 if (uniself==NULL)
1735 return NULL;
1736 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1737 striptype, sep);
1738 Py_DECREF(uniself);
1739 return res;
1740 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001741#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001742 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001743 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001744#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001745 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001746#else
1747 "%s arg must be None or str",
1748#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001749 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001750 return NULL;
1751 }
1752 return do_xstrip(self, striptype, sep);
1753 }
1754
1755 return do_strip(self, striptype);
1756}
1757
1758
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001759PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001760"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761\n\
1762Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001764If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001765If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766
1767static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001768string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001770 if (PyTuple_GET_SIZE(args) == 0)
1771 return do_strip(self, BOTHSTRIP); /* Common case */
1772 else
1773 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774}
1775
1776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001777PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001778"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001780Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001781If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001782If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783
1784static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001785string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001787 if (PyTuple_GET_SIZE(args) == 0)
1788 return do_strip(self, LEFTSTRIP); /* Common case */
1789 else
1790 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791}
1792
1793
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001794PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001795"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001797Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001798If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001802string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001804 if (PyTuple_GET_SIZE(args) == 0)
1805 return do_strip(self, RIGHTSTRIP); /* Common case */
1806 else
1807 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808}
1809
1810
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812"S.lower() -> string\n\
1813\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815
1816static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001817string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818{
1819 char *s = PyString_AS_STRING(self), *s_new;
1820 int i, n = PyString_GET_SIZE(self);
1821 PyObject *new;
1822
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 new = PyString_FromStringAndSize(NULL, n);
1824 if (new == NULL)
1825 return NULL;
1826 s_new = PyString_AsString(new);
1827 for (i = 0; i < n; i++) {
1828 int c = Py_CHARMASK(*s++);
1829 if (isupper(c)) {
1830 *s_new = tolower(c);
1831 } else
1832 *s_new = c;
1833 s_new++;
1834 }
1835 return new;
1836}
1837
1838
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001839PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840"S.upper() -> string\n\
1841\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001842Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843
1844static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001845string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846{
1847 char *s = PyString_AS_STRING(self), *s_new;
1848 int i, n = PyString_GET_SIZE(self);
1849 PyObject *new;
1850
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 new = PyString_FromStringAndSize(NULL, n);
1852 if (new == NULL)
1853 return NULL;
1854 s_new = PyString_AsString(new);
1855 for (i = 0; i < n; i++) {
1856 int c = Py_CHARMASK(*s++);
1857 if (islower(c)) {
1858 *s_new = toupper(c);
1859 } else
1860 *s_new = c;
1861 s_new++;
1862 }
1863 return new;
1864}
1865
1866
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001867PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868"S.title() -> string\n\
1869\n\
1870Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001871characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872
1873static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001874string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875{
1876 char *s = PyString_AS_STRING(self), *s_new;
1877 int i, n = PyString_GET_SIZE(self);
1878 int previous_is_cased = 0;
1879 PyObject *new;
1880
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881 new = PyString_FromStringAndSize(NULL, n);
1882 if (new == NULL)
1883 return NULL;
1884 s_new = PyString_AsString(new);
1885 for (i = 0; i < n; i++) {
1886 int c = Py_CHARMASK(*s++);
1887 if (islower(c)) {
1888 if (!previous_is_cased)
1889 c = toupper(c);
1890 previous_is_cased = 1;
1891 } else if (isupper(c)) {
1892 if (previous_is_cased)
1893 c = tolower(c);
1894 previous_is_cased = 1;
1895 } else
1896 previous_is_cased = 0;
1897 *s_new++ = c;
1898 }
1899 return new;
1900}
1901
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001902PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903"S.capitalize() -> string\n\
1904\n\
1905Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001906capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907
1908static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001909string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910{
1911 char *s = PyString_AS_STRING(self), *s_new;
1912 int i, n = PyString_GET_SIZE(self);
1913 PyObject *new;
1914
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 new = PyString_FromStringAndSize(NULL, n);
1916 if (new == NULL)
1917 return NULL;
1918 s_new = PyString_AsString(new);
1919 if (0 < n) {
1920 int c = Py_CHARMASK(*s++);
1921 if (islower(c))
1922 *s_new = toupper(c);
1923 else
1924 *s_new = c;
1925 s_new++;
1926 }
1927 for (i = 1; i < n; i++) {
1928 int c = Py_CHARMASK(*s++);
1929 if (isupper(c))
1930 *s_new = tolower(c);
1931 else
1932 *s_new = c;
1933 s_new++;
1934 }
1935 return new;
1936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940"S.count(sub[, start[, end]]) -> int\n\
1941\n\
1942Return the number of occurrences of substring sub in string\n\
1943S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001944interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
1946static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001947string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 int len = PyString_GET_SIZE(self), n;
1951 int i = 0, last = INT_MAX;
1952 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001953 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
Guido van Rossumc6821402000-05-08 14:08:05 +00001955 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1956 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001958
Guido van Rossum4c08d552000-03-10 22:55:18 +00001959 if (PyString_Check(subobj)) {
1960 sub = PyString_AS_STRING(subobj);
1961 n = PyString_GET_SIZE(subobj);
1962 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001963#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001964 else if (PyUnicode_Check(subobj)) {
1965 int count;
1966 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1967 if (count == -1)
1968 return NULL;
1969 else
1970 return PyInt_FromLong((long) count);
1971 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001972#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001973 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1974 return NULL;
1975
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001976 string_adjust_indices(&i, &last, len);
1977
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 m = last + 1 - n;
1979 if (n == 0)
1980 return PyInt_FromLong((long) (m-i));
1981
1982 r = 0;
1983 while (i < m) {
1984 if (!memcmp(s+i, sub, n)) {
1985 r++;
1986 i += n;
1987 } else {
1988 i++;
1989 }
1990 }
1991 return PyInt_FromLong((long) r);
1992}
1993
1994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001995PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996"S.swapcase() -> string\n\
1997\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001998Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001999converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000
2001static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002002string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003{
2004 char *s = PyString_AS_STRING(self), *s_new;
2005 int i, n = PyString_GET_SIZE(self);
2006 PyObject *new;
2007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 new = PyString_FromStringAndSize(NULL, n);
2009 if (new == NULL)
2010 return NULL;
2011 s_new = PyString_AsString(new);
2012 for (i = 0; i < n; i++) {
2013 int c = Py_CHARMASK(*s++);
2014 if (islower(c)) {
2015 *s_new = toupper(c);
2016 }
2017 else if (isupper(c)) {
2018 *s_new = tolower(c);
2019 }
2020 else
2021 *s_new = c;
2022 s_new++;
2023 }
2024 return new;
2025}
2026
2027
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029"S.translate(table [,deletechars]) -> string\n\
2030\n\
2031Return a copy of the string S, where all characters occurring\n\
2032in the optional argument deletechars are removed, and the\n\
2033remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002034translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035
2036static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002037string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 register char *input, *output;
2040 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041 register int i, c, changed = 0;
2042 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 int inlen, tablen, dellen = 0;
2045 PyObject *result;
2046 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002047 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048
Guido van Rossum4c08d552000-03-10 22:55:18 +00002049 if (!PyArg_ParseTuple(args, "O|O:translate",
2050 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052
2053 if (PyString_Check(tableobj)) {
2054 table1 = PyString_AS_STRING(tableobj);
2055 tablen = PyString_GET_SIZE(tableobj);
2056 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002057#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002058 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002059 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 parameter; instead a mapping to None will cause characters
2061 to be deleted. */
2062 if (delobj != NULL) {
2063 PyErr_SetString(PyExc_TypeError,
2064 "deletions are implemented differently for unicode");
2065 return NULL;
2066 }
2067 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2068 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002069#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002070 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072
Martin v. Löwis00b61272002-12-12 20:03:19 +00002073 if (tablen != 256) {
2074 PyErr_SetString(PyExc_ValueError,
2075 "translation table must be 256 characters long");
2076 return NULL;
2077 }
2078
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079 if (delobj != NULL) {
2080 if (PyString_Check(delobj)) {
2081 del_table = PyString_AS_STRING(delobj);
2082 dellen = PyString_GET_SIZE(delobj);
2083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002084#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 else if (PyUnicode_Check(delobj)) {
2086 PyErr_SetString(PyExc_TypeError,
2087 "deletions are implemented differently for unicode");
2088 return NULL;
2089 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002090#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2092 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 }
2094 else {
2095 del_table = NULL;
2096 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097 }
2098
2099 table = table1;
2100 inlen = PyString_Size(input_obj);
2101 result = PyString_FromStringAndSize((char *)NULL, inlen);
2102 if (result == NULL)
2103 return NULL;
2104 output_start = output = PyString_AsString(result);
2105 input = PyString_AsString(input_obj);
2106
2107 if (dellen == 0) {
2108 /* If no deletions are required, use faster code */
2109 for (i = inlen; --i >= 0; ) {
2110 c = Py_CHARMASK(*input++);
2111 if (Py_CHARMASK((*output++ = table[c])) != c)
2112 changed = 1;
2113 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002114 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115 return result;
2116 Py_DECREF(result);
2117 Py_INCREF(input_obj);
2118 return input_obj;
2119 }
2120
2121 for (i = 0; i < 256; i++)
2122 trans_table[i] = Py_CHARMASK(table[i]);
2123
2124 for (i = 0; i < dellen; i++)
2125 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2126
2127 for (i = inlen; --i >= 0; ) {
2128 c = Py_CHARMASK(*input++);
2129 if (trans_table[c] != -1)
2130 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2131 continue;
2132 changed = 1;
2133 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002134 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 Py_DECREF(result);
2136 Py_INCREF(input_obj);
2137 return input_obj;
2138 }
2139 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002140 if (inlen > 0)
2141 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142 return result;
2143}
2144
2145
2146/* What follows is used for implementing replace(). Perry Stoll. */
2147
2148/*
2149 mymemfind
2150
2151 strstr replacement for arbitrary blocks of memory.
2152
Barry Warsaw51ac5802000-03-20 16:36:48 +00002153 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154 contents of memory pointed to by PAT. Returns the index into MEM if
2155 found, or -1 if not found. If len of PAT is greater than length of
2156 MEM, the function returns -1.
2157*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002158static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002159mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160{
2161 register int ii;
2162
2163 /* pattern can not occur in the last pat_len-1 chars */
2164 len -= pat_len;
2165
2166 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002167 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 return ii;
2169 }
2170 }
2171 return -1;
2172}
2173
2174/*
2175 mymemcnt
2176
2177 Return the number of distinct times PAT is found in MEM.
2178 meaning mem=1111 and pat==11 returns 2.
2179 mem=11111 and pat==11 also return 2.
2180 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002181static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002182mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183{
2184 register int offset = 0;
2185 int nfound = 0;
2186
2187 while (len >= 0) {
2188 offset = mymemfind(mem, len, pat, pat_len);
2189 if (offset == -1)
2190 break;
2191 mem += offset + pat_len;
2192 len -= offset + pat_len;
2193 nfound++;
2194 }
2195 return nfound;
2196}
2197
2198/*
2199 mymemreplace
2200
Thomas Wouters7e474022000-07-16 12:04:32 +00002201 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202 replaced with SUB.
2203
Thomas Wouters7e474022000-07-16 12:04:32 +00002204 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205 of PAT in STR, then the original string is returned. Otherwise, a new
2206 string is allocated here and returned.
2207
2208 on return, out_len is:
2209 the length of output string, or
2210 -1 if the input string is returned, or
2211 unchanged if an error occurs (no memory).
2212
2213 return value is:
2214 the new string allocated locally, or
2215 NULL if an error occurred.
2216*/
2217static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002218mymemreplace(const char *str, int len, /* input string */
2219 const char *pat, int pat_len, /* pattern string to find */
2220 const char *sub, int sub_len, /* substitution string */
2221 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002222 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223{
2224 char *out_s;
2225 char *new_s;
2226 int nfound, offset, new_len;
2227
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002228 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 goto return_same;
2230
2231 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002232 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002233 if (count < 0)
2234 count = INT_MAX;
2235 else if (nfound > count)
2236 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 if (nfound == 0)
2238 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002239
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002241 if (new_len == 0) {
2242 /* Have to allocate something for the caller to free(). */
2243 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002244 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002245 return NULL;
2246 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002248 else {
2249 assert(new_len > 0);
2250 new_s = (char *)PyMem_MALLOC(new_len);
2251 if (new_s == NULL)
2252 return NULL;
2253 out_s = new_s;
2254
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002255 if (pat_len > 0) {
2256 for (; nfound > 0; --nfound) {
2257 /* find index of next instance of pattern */
2258 offset = mymemfind(str, len, pat, pat_len);
2259 if (offset == -1)
2260 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002261
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002262 /* copy non matching part of input string */
2263 memcpy(new_s, str, offset);
2264 str += offset + pat_len;
2265 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002266
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002267 /* copy substitute into the output string */
2268 new_s += offset;
2269 memcpy(new_s, sub, sub_len);
2270 new_s += sub_len;
2271 }
2272 /* copy any remaining values into output string */
2273 if (len > 0)
2274 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002275 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002276 else {
2277 for (;;++str, --len) {
2278 memcpy(new_s, sub, sub_len);
2279 new_s += sub_len;
2280 if (--nfound <= 0) {
2281 memcpy(new_s, str, len);
2282 break;
2283 }
2284 *new_s++ = *str;
2285 }
2286 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002287 }
2288 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289 return out_s;
2290
2291 return_same:
2292 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002293 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294}
2295
2296
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002297PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298"S.replace (old, new[, maxsplit]) -> string\n\
2299\n\
2300Return a copy of string S with all occurrences of substring\n\
2301old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002302given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303
2304static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002305string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 const char *str = PyString_AS_STRING(self), *sub, *repl;
2308 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002309 const int len = PyString_GET_SIZE(self);
2310 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 if (!PyArg_ParseTuple(args, "OO|i:replace",
2316 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318
2319 if (PyString_Check(subobj)) {
2320 sub = PyString_AS_STRING(subobj);
2321 sub_len = PyString_GET_SIZE(subobj);
2322 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002323#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002325 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002327#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2329 return NULL;
2330
2331 if (PyString_Check(replobj)) {
2332 repl = PyString_AS_STRING(replobj);
2333 repl_len = PyString_GET_SIZE(replobj);
2334 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002335#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002337 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002338 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002339#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002340 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2341 return NULL;
2342
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 if (new_s == NULL) {
2345 PyErr_NoMemory();
2346 return NULL;
2347 }
2348 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002349 if (PyString_CheckExact(self)) {
2350 /* we're returning another reference to self */
2351 new = (PyObject*)self;
2352 Py_INCREF(new);
2353 }
2354 else {
2355 new = PyString_FromStringAndSize(str, len);
2356 if (new == NULL)
2357 return NULL;
2358 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359 }
2360 else {
2361 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002362 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363 }
2364 return new;
2365}
2366
2367
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002368PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002369"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002371Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002373comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374
2375static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002376string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381 int plen;
2382 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002383 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385
Guido van Rossumc6821402000-05-08 14:08:05 +00002386 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2387 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 return NULL;
2389 if (PyString_Check(subobj)) {
2390 prefix = PyString_AS_STRING(subobj);
2391 plen = PyString_GET_SIZE(subobj);
2392 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002393#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002394 else if (PyUnicode_Check(subobj)) {
2395 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002396 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002397 subobj, start, end, -1);
2398 if (rc == -1)
2399 return NULL;
2400 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002401 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002402 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002403#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405 return NULL;
2406
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002407 string_adjust_indices(&start, &end, len);
2408
2409 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002410 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002412 if (end-start >= plen)
2413 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2414 else
2415 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416}
2417
2418
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002419PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002420"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002422Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002424comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425
2426static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002427string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431 const char* suffix;
2432 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002434 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436
Guido van Rossumc6821402000-05-08 14:08:05 +00002437 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2438 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439 return NULL;
2440 if (PyString_Check(subobj)) {
2441 suffix = PyString_AS_STRING(subobj);
2442 slen = PyString_GET_SIZE(subobj);
2443 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002444#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002445 else if (PyUnicode_Check(subobj)) {
2446 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002447 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002448 subobj, start, end, +1);
2449 if (rc == -1)
2450 return NULL;
2451 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002452 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002453 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002454#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002455 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456 return NULL;
2457
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002458 string_adjust_indices(&start, &end, len);
2459
2460 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002461 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002463 if (end-slen > start)
2464 start = end - slen;
2465 if (end-start >= slen)
2466 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2467 else
2468 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469}
2470
2471
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002472PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002473"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002474\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002475Encodes S using the codec registered for encoding. encoding defaults\n\
2476to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002477handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002478a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2479'xmlcharrefreplace' as well as any other name registered with\n\
2480codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002481
2482static PyObject *
2483string_encode(PyStringObject *self, PyObject *args)
2484{
2485 char *encoding = NULL;
2486 char *errors = NULL;
2487 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2488 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002489 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2490}
2491
2492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002493PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002494"S.decode([encoding[,errors]]) -> object\n\
2495\n\
2496Decodes S using the codec registered for encoding. encoding defaults\n\
2497to the default encoding. errors may be given to set a different error\n\
2498handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002499a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2500as well as any other name registerd with codecs.register_error that is\n\
2501able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002502
2503static PyObject *
2504string_decode(PyStringObject *self, PyObject *args)
2505{
2506 char *encoding = NULL;
2507 char *errors = NULL;
2508 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2509 return NULL;
2510 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002511}
2512
2513
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002514PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515"S.expandtabs([tabsize]) -> string\n\
2516\n\
2517Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002518If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519
2520static PyObject*
2521string_expandtabs(PyStringObject *self, PyObject *args)
2522{
2523 const char *e, *p;
2524 char *q;
2525 int i, j;
2526 PyObject *u;
2527 int tabsize = 8;
2528
2529 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2530 return NULL;
2531
Thomas Wouters7e474022000-07-16 12:04:32 +00002532 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 i = j = 0;
2534 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2535 for (p = PyString_AS_STRING(self); p < e; p++)
2536 if (*p == '\t') {
2537 if (tabsize > 0)
2538 j += tabsize - (j % tabsize);
2539 }
2540 else {
2541 j++;
2542 if (*p == '\n' || *p == '\r') {
2543 i += j;
2544 j = 0;
2545 }
2546 }
2547
2548 /* Second pass: create output string and fill it */
2549 u = PyString_FromStringAndSize(NULL, i + j);
2550 if (!u)
2551 return NULL;
2552
2553 j = 0;
2554 q = PyString_AS_STRING(u);
2555
2556 for (p = PyString_AS_STRING(self); p < e; p++)
2557 if (*p == '\t') {
2558 if (tabsize > 0) {
2559 i = tabsize - (j % tabsize);
2560 j += i;
2561 while (i--)
2562 *q++ = ' ';
2563 }
2564 }
2565 else {
2566 j++;
2567 *q++ = *p;
2568 if (*p == '\n' || *p == '\r')
2569 j = 0;
2570 }
2571
2572 return u;
2573}
2574
Tim Peters8fa5dd02001-09-12 02:18:30 +00002575static PyObject *
2576pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577{
2578 PyObject *u;
2579
2580 if (left < 0)
2581 left = 0;
2582 if (right < 0)
2583 right = 0;
2584
Tim Peters8fa5dd02001-09-12 02:18:30 +00002585 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 Py_INCREF(self);
2587 return (PyObject *)self;
2588 }
2589
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002590 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 left + PyString_GET_SIZE(self) + right);
2592 if (u) {
2593 if (left)
2594 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002595 memcpy(PyString_AS_STRING(u) + left,
2596 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002597 PyString_GET_SIZE(self));
2598 if (right)
2599 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2600 fill, right);
2601 }
2602
2603 return u;
2604}
2605
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002606PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002607"S.ljust(width) -> string\n"
2608"\n"
2609"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002610"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611
2612static PyObject *
2613string_ljust(PyStringObject *self, PyObject *args)
2614{
2615 int width;
2616 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2617 return NULL;
2618
Tim Peters8fa5dd02001-09-12 02:18:30 +00002619 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 Py_INCREF(self);
2621 return (PyObject*) self;
2622 }
2623
2624 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2625}
2626
2627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002628PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002629"S.rjust(width) -> string\n"
2630"\n"
2631"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002632"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633
2634static PyObject *
2635string_rjust(PyStringObject *self, PyObject *args)
2636{
2637 int width;
2638 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2639 return NULL;
2640
Tim Peters8fa5dd02001-09-12 02:18:30 +00002641 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642 Py_INCREF(self);
2643 return (PyObject*) self;
2644 }
2645
2646 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2647}
2648
2649
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002650PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002651"S.center(width) -> string\n"
2652"\n"
2653"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002654"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655
2656static PyObject *
2657string_center(PyStringObject *self, PyObject *args)
2658{
2659 int marg, left;
2660 int width;
2661
2662 if (!PyArg_ParseTuple(args, "i:center", &width))
2663 return NULL;
2664
Tim Peters8fa5dd02001-09-12 02:18:30 +00002665 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002666 Py_INCREF(self);
2667 return (PyObject*) self;
2668 }
2669
2670 marg = width - PyString_GET_SIZE(self);
2671 left = marg / 2 + (marg & width & 1);
2672
2673 return pad(self, left, marg - left, ' ');
2674}
2675
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002676PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002677"S.zfill(width) -> string\n"
2678"\n"
2679"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002680"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002681
2682static PyObject *
2683string_zfill(PyStringObject *self, PyObject *args)
2684{
2685 int fill;
2686 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002687 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002688
2689 int width;
2690 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2691 return NULL;
2692
2693 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002694 if (PyString_CheckExact(self)) {
2695 Py_INCREF(self);
2696 return (PyObject*) self;
2697 }
2698 else
2699 return PyString_FromStringAndSize(
2700 PyString_AS_STRING(self),
2701 PyString_GET_SIZE(self)
2702 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002703 }
2704
2705 fill = width - PyString_GET_SIZE(self);
2706
2707 s = pad(self, fill, 0, '0');
2708
2709 if (s == NULL)
2710 return NULL;
2711
2712 p = PyString_AS_STRING(s);
2713 if (p[fill] == '+' || p[fill] == '-') {
2714 /* move sign to beginning of string */
2715 p[0] = p[fill];
2716 p[fill] = '0';
2717 }
2718
2719 return (PyObject*) s;
2720}
2721
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002722PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002723"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002724"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002725"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002726"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002727
2728static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002729string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002730{
Fred Drakeba096332000-07-09 07:04:36 +00002731 register const unsigned char *p
2732 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002733 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002734
Guido van Rossum4c08d552000-03-10 22:55:18 +00002735 /* Shortcut for single character strings */
2736 if (PyString_GET_SIZE(self) == 1 &&
2737 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002739
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002740 /* Special case for empty strings */
2741 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002742 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002743
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 e = p + PyString_GET_SIZE(self);
2745 for (; p < e; p++) {
2746 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002747 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002749 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002750}
2751
2752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002753PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002754"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002755\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002756Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002757and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002758
2759static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002760string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002761{
Fred Drakeba096332000-07-09 07:04:36 +00002762 register const unsigned char *p
2763 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002764 register const unsigned char *e;
2765
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002766 /* Shortcut for single character strings */
2767 if (PyString_GET_SIZE(self) == 1 &&
2768 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002770
2771 /* Special case for empty strings */
2772 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002773 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002774
2775 e = p + PyString_GET_SIZE(self);
2776 for (; p < e; p++) {
2777 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002778 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002780 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002781}
2782
2783
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002784PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002785"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002786\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002787Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002788and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002789
2790static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002791string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002792{
Fred Drakeba096332000-07-09 07:04:36 +00002793 register const unsigned char *p
2794 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002795 register const unsigned char *e;
2796
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002797 /* Shortcut for single character strings */
2798 if (PyString_GET_SIZE(self) == 1 &&
2799 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002801
2802 /* Special case for empty strings */
2803 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002804 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002805
2806 e = p + PyString_GET_SIZE(self);
2807 for (; p < e; p++) {
2808 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002809 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002811 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002812}
2813
2814
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002815PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002816"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002817\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002818Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002819False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820
2821static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002822string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002823{
Fred Drakeba096332000-07-09 07:04:36 +00002824 register const unsigned char *p
2825 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002826 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002827
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828 /* Shortcut for single character strings */
2829 if (PyString_GET_SIZE(self) == 1 &&
2830 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002831 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002833 /* Special case for empty strings */
2834 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002835 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002836
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837 e = p + PyString_GET_SIZE(self);
2838 for (; p < e; p++) {
2839 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002840 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002842 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002843}
2844
2845
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002846PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002847"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002848\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002849Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002850at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002851
2852static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002853string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854{
Fred Drakeba096332000-07-09 07:04:36 +00002855 register const unsigned char *p
2856 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002857 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002858 int cased;
2859
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860 /* Shortcut for single character strings */
2861 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002862 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002863
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002864 /* Special case for empty strings */
2865 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002866 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002867
Guido van Rossum4c08d552000-03-10 22:55:18 +00002868 e = p + PyString_GET_SIZE(self);
2869 cased = 0;
2870 for (; p < e; p++) {
2871 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002872 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873 else if (!cased && islower(*p))
2874 cased = 1;
2875 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002876 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877}
2878
2879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002880PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002881"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002883Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002884at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885
2886static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002887string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888{
Fred Drakeba096332000-07-09 07:04:36 +00002889 register const unsigned char *p
2890 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002891 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002892 int cased;
2893
Guido van Rossum4c08d552000-03-10 22:55:18 +00002894 /* Shortcut for single character strings */
2895 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002898 /* Special case for empty strings */
2899 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002900 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002901
Guido van Rossum4c08d552000-03-10 22:55:18 +00002902 e = p + PyString_GET_SIZE(self);
2903 cased = 0;
2904 for (; p < e; p++) {
2905 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002906 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907 else if (!cased && isupper(*p))
2908 cased = 1;
2909 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002910 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002911}
2912
2913
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002914PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002915"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002917Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002918may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002919ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920
2921static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002922string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923{
Fred Drakeba096332000-07-09 07:04:36 +00002924 register const unsigned char *p
2925 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002926 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002927 int cased, previous_is_cased;
2928
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929 /* Shortcut for single character strings */
2930 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002931 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002932
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002933 /* Special case for empty strings */
2934 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002935 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002936
Guido van Rossum4c08d552000-03-10 22:55:18 +00002937 e = p + PyString_GET_SIZE(self);
2938 cased = 0;
2939 previous_is_cased = 0;
2940 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002941 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942
2943 if (isupper(ch)) {
2944 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002945 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946 previous_is_cased = 1;
2947 cased = 1;
2948 }
2949 else if (islower(ch)) {
2950 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002951 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002952 previous_is_cased = 1;
2953 cased = 1;
2954 }
2955 else
2956 previous_is_cased = 0;
2957 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002958 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002959}
2960
2961
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002962PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002963"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964\n\
2965Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002966Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002967is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002968
2969#define SPLIT_APPEND(data, left, right) \
2970 str = PyString_FromStringAndSize(data + left, right - left); \
2971 if (!str) \
2972 goto onError; \
2973 if (PyList_Append(list, str)) { \
2974 Py_DECREF(str); \
2975 goto onError; \
2976 } \
2977 else \
2978 Py_DECREF(str);
2979
2980static PyObject*
2981string_splitlines(PyStringObject *self, PyObject *args)
2982{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983 register int i;
2984 register int j;
2985 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002986 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 PyObject *list;
2988 PyObject *str;
2989 char *data;
2990
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002991 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002992 return NULL;
2993
2994 data = PyString_AS_STRING(self);
2995 len = PyString_GET_SIZE(self);
2996
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997 list = PyList_New(0);
2998 if (!list)
2999 goto onError;
3000
3001 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003002 int eol;
3003
Guido van Rossum4c08d552000-03-10 22:55:18 +00003004 /* Find a line and append it */
3005 while (i < len && data[i] != '\n' && data[i] != '\r')
3006 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003007
3008 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003009 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003010 if (i < len) {
3011 if (data[i] == '\r' && i + 1 < len &&
3012 data[i+1] == '\n')
3013 i += 2;
3014 else
3015 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003016 if (keepends)
3017 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003018 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003019 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003020 j = i;
3021 }
3022 if (j < len) {
3023 SPLIT_APPEND(data, j, len);
3024 }
3025
3026 return list;
3027
3028 onError:
3029 Py_DECREF(list);
3030 return NULL;
3031}
3032
3033#undef SPLIT_APPEND
3034
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003035
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003036static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003037string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003038 /* Counterparts of the obsolete stropmodule functions; except
3039 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003040 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3041 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3042 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3043 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003044 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3045 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3046 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3047 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3048 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3049 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3050 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003051 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3052 capitalize__doc__},
3053 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3054 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3055 endswith__doc__},
3056 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3057 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3058 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3059 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3060 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3061 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3062 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3063 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3064 startswith__doc__},
3065 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3066 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3067 swapcase__doc__},
3068 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3069 translate__doc__},
3070 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3071 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3072 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3073 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3074 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3075 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3076 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3077 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3078 expandtabs__doc__},
3079 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3080 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003081 {NULL, NULL} /* sentinel */
3082};
3083
Jeremy Hylton938ace62002-07-17 16:30:39 +00003084static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003085str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3086
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003088string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003089{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003090 PyObject *x = NULL;
3091 static char *kwlist[] = {"object", 0};
3092
Guido van Rossumae960af2001-08-30 03:11:59 +00003093 if (type != &PyString_Type)
3094 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003095 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3096 return NULL;
3097 if (x == NULL)
3098 return PyString_FromString("");
3099 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003100}
3101
Guido van Rossumae960af2001-08-30 03:11:59 +00003102static PyObject *
3103str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3104{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003105 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003106 int n;
3107
3108 assert(PyType_IsSubtype(type, &PyString_Type));
3109 tmp = string_new(&PyString_Type, args, kwds);
3110 if (tmp == NULL)
3111 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003112 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003113 n = PyString_GET_SIZE(tmp);
3114 pnew = type->tp_alloc(type, n);
3115 if (pnew != NULL) {
3116 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003117 ((PyStringObject *)pnew)->ob_shash =
3118 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003119 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003120 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003121 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003122 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003123}
3124
Guido van Rossumcacfc072002-05-24 19:01:59 +00003125static PyObject *
3126basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3127{
3128 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003129 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003130 return NULL;
3131}
3132
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003133static PyObject *
3134string_mod(PyObject *v, PyObject *w)
3135{
3136 if (!PyString_Check(v)) {
3137 Py_INCREF(Py_NotImplemented);
3138 return Py_NotImplemented;
3139 }
3140 return PyString_Format(v, w);
3141}
3142
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003143PyDoc_STRVAR(basestring_doc,
3144"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003145
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003146static PyNumberMethods string_as_number = {
3147 0, /*nb_add*/
3148 0, /*nb_subtract*/
3149 0, /*nb_multiply*/
3150 0, /*nb_divide*/
3151 string_mod, /*nb_remainder*/
3152};
3153
3154
Guido van Rossumcacfc072002-05-24 19:01:59 +00003155PyTypeObject PyBaseString_Type = {
3156 PyObject_HEAD_INIT(&PyType_Type)
3157 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003158 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003159 0,
3160 0,
3161 0, /* tp_dealloc */
3162 0, /* tp_print */
3163 0, /* tp_getattr */
3164 0, /* tp_setattr */
3165 0, /* tp_compare */
3166 0, /* tp_repr */
3167 0, /* tp_as_number */
3168 0, /* tp_as_sequence */
3169 0, /* tp_as_mapping */
3170 0, /* tp_hash */
3171 0, /* tp_call */
3172 0, /* tp_str */
3173 0, /* tp_getattro */
3174 0, /* tp_setattro */
3175 0, /* tp_as_buffer */
3176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3177 basestring_doc, /* tp_doc */
3178 0, /* tp_traverse */
3179 0, /* tp_clear */
3180 0, /* tp_richcompare */
3181 0, /* tp_weaklistoffset */
3182 0, /* tp_iter */
3183 0, /* tp_iternext */
3184 0, /* tp_methods */
3185 0, /* tp_members */
3186 0, /* tp_getset */
3187 &PyBaseObject_Type, /* tp_base */
3188 0, /* tp_dict */
3189 0, /* tp_descr_get */
3190 0, /* tp_descr_set */
3191 0, /* tp_dictoffset */
3192 0, /* tp_init */
3193 0, /* tp_alloc */
3194 basestring_new, /* tp_new */
3195 0, /* tp_free */
3196};
3197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003198PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003199"str(object) -> string\n\
3200\n\
3201Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003202If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003203
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204PyTypeObject PyString_Type = {
3205 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003206 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003207 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003208 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003209 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003210 (destructor)string_dealloc, /* tp_dealloc */
3211 (printfunc)string_print, /* tp_print */
3212 0, /* tp_getattr */
3213 0, /* tp_setattr */
3214 0, /* tp_compare */
3215 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003216 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003217 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003218 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003219 (hashfunc)string_hash, /* tp_hash */
3220 0, /* tp_call */
3221 (reprfunc)string_str, /* tp_str */
3222 PyObject_GenericGetAttr, /* tp_getattro */
3223 0, /* tp_setattro */
3224 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003225 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3226 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003227 string_doc, /* tp_doc */
3228 0, /* tp_traverse */
3229 0, /* tp_clear */
3230 (richcmpfunc)string_richcompare, /* tp_richcompare */
3231 0, /* tp_weaklistoffset */
3232 0, /* tp_iter */
3233 0, /* tp_iternext */
3234 string_methods, /* tp_methods */
3235 0, /* tp_members */
3236 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003237 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003238 0, /* tp_dict */
3239 0, /* tp_descr_get */
3240 0, /* tp_descr_set */
3241 0, /* tp_dictoffset */
3242 0, /* tp_init */
3243 0, /* tp_alloc */
3244 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003245 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003246};
3247
3248void
Fred Drakeba096332000-07-09 07:04:36 +00003249PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003250{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003251 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003252 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003253 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003254 if (w == NULL || !PyString_Check(*pv)) {
3255 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003256 *pv = NULL;
3257 return;
3258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003259 v = string_concat((PyStringObject *) *pv, w);
3260 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003261 *pv = v;
3262}
3263
Guido van Rossum013142a1994-08-30 08:19:36 +00003264void
Fred Drakeba096332000-07-09 07:04:36 +00003265PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003266{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003267 PyString_Concat(pv, w);
3268 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003269}
3270
3271
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003272/* The following function breaks the notion that strings are immutable:
3273 it changes the size of a string. We get away with this only if there
3274 is only one module referencing the object. You can also think of it
3275 as creating a new string object and destroying the old one, only
3276 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003277 already be known to some other part of the code...
3278 Note that if there's not enough memory to resize the string, the original
3279 string object at *pv is deallocated, *pv is set to NULL, an "out of
3280 memory" exception is set, and -1 is returned. Else (on success) 0 is
3281 returned, and the value in *pv may or may not be the same as on input.
3282 As always, an extra byte is allocated for a trailing \0 byte (newsize
3283 does *not* include that), and a trailing \0 byte is stored.
3284*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003285
3286int
Fred Drakeba096332000-07-09 07:04:36 +00003287_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003288{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003289 register PyObject *v;
3290 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003291 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003292 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003293 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003294 Py_DECREF(v);
3295 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003296 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003297 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003298 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003299 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003300 _Py_ForgetReference(v);
3301 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003302 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003304 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003305 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003306 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003307 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003308 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003309 _Py_NewReference(*pv);
3310 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003311 sv->ob_size = newsize;
3312 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003313 return 0;
3314}
Guido van Rossume5372401993-03-16 12:15:04 +00003315
3316/* Helpers for formatstring */
3317
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003318static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003319getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003320{
3321 int argidx = *p_argidx;
3322 if (argidx < arglen) {
3323 (*p_argidx)++;
3324 if (arglen < 0)
3325 return args;
3326 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003327 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003328 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003329 PyErr_SetString(PyExc_TypeError,
3330 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003331 return NULL;
3332}
3333
Tim Peters38fd5b62000-09-21 05:43:11 +00003334/* Format codes
3335 * F_LJUST '-'
3336 * F_SIGN '+'
3337 * F_BLANK ' '
3338 * F_ALT '#'
3339 * F_ZERO '0'
3340 */
Guido van Rossume5372401993-03-16 12:15:04 +00003341#define F_LJUST (1<<0)
3342#define F_SIGN (1<<1)
3343#define F_BLANK (1<<2)
3344#define F_ALT (1<<3)
3345#define F_ZERO (1<<4)
3346
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003347static int
Fred Drakeba096332000-07-09 07:04:36 +00003348formatfloat(char *buf, size_t buflen, int flags,
3349 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003350{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003351 /* fmt = '%#.' + `prec` + `type`
3352 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003353 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003354 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003355 x = PyFloat_AsDouble(v);
3356 if (x == -1.0 && PyErr_Occurred()) {
3357 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003358 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003359 }
Guido van Rossume5372401993-03-16 12:15:04 +00003360 if (prec < 0)
3361 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003362 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3363 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003364 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3365 (flags&F_ALT) ? "#" : "",
3366 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003367 /* worst case length calc to ensure no buffer overrun:
3368 fmt = %#.<prec>g
3369 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003370 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003371 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3372 If prec=0 the effective precision is 1 (the leading digit is
3373 always given), therefore increase by one to 10+prec. */
3374 if (buflen <= (size_t)10 + (size_t)prec) {
3375 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003376 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003377 return -1;
3378 }
Tim Peters885d4572001-11-28 20:27:42 +00003379 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003380 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003381}
3382
Tim Peters38fd5b62000-09-21 05:43:11 +00003383/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3384 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3385 * Python's regular ints.
3386 * Return value: a new PyString*, or NULL if error.
3387 * . *pbuf is set to point into it,
3388 * *plen set to the # of chars following that.
3389 * Caller must decref it when done using pbuf.
3390 * The string starting at *pbuf is of the form
3391 * "-"? ("0x" | "0X")? digit+
3392 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003393 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003394 * There will be at least prec digits, zero-filled on the left if
3395 * necessary to get that many.
3396 * val object to be converted
3397 * flags bitmask of format flags; only F_ALT is looked at
3398 * prec minimum number of digits; 0-fill on left if needed
3399 * type a character in [duoxX]; u acts the same as d
3400 *
3401 * CAUTION: o, x and X conversions on regular ints can never
3402 * produce a '-' sign, but can for Python's unbounded ints.
3403 */
3404PyObject*
3405_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3406 char **pbuf, int *plen)
3407{
3408 PyObject *result = NULL;
3409 char *buf;
3410 int i;
3411 int sign; /* 1 if '-', else 0 */
3412 int len; /* number of characters */
3413 int numdigits; /* len == numnondigits + numdigits */
3414 int numnondigits = 0;
3415
3416 switch (type) {
3417 case 'd':
3418 case 'u':
3419 result = val->ob_type->tp_str(val);
3420 break;
3421 case 'o':
3422 result = val->ob_type->tp_as_number->nb_oct(val);
3423 break;
3424 case 'x':
3425 case 'X':
3426 numnondigits = 2;
3427 result = val->ob_type->tp_as_number->nb_hex(val);
3428 break;
3429 default:
3430 assert(!"'type' not in [duoxX]");
3431 }
3432 if (!result)
3433 return NULL;
3434
3435 /* To modify the string in-place, there can only be one reference. */
3436 if (result->ob_refcnt != 1) {
3437 PyErr_BadInternalCall();
3438 return NULL;
3439 }
3440 buf = PyString_AsString(result);
3441 len = PyString_Size(result);
3442 if (buf[len-1] == 'L') {
3443 --len;
3444 buf[len] = '\0';
3445 }
3446 sign = buf[0] == '-';
3447 numnondigits += sign;
3448 numdigits = len - numnondigits;
3449 assert(numdigits > 0);
3450
Tim Petersfff53252001-04-12 18:38:48 +00003451 /* Get rid of base marker unless F_ALT */
3452 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003453 /* Need to skip 0x, 0X or 0. */
3454 int skipped = 0;
3455 switch (type) {
3456 case 'o':
3457 assert(buf[sign] == '0');
3458 /* If 0 is only digit, leave it alone. */
3459 if (numdigits > 1) {
3460 skipped = 1;
3461 --numdigits;
3462 }
3463 break;
3464 case 'x':
3465 case 'X':
3466 assert(buf[sign] == '0');
3467 assert(buf[sign + 1] == 'x');
3468 skipped = 2;
3469 numnondigits -= 2;
3470 break;
3471 }
3472 if (skipped) {
3473 buf += skipped;
3474 len -= skipped;
3475 if (sign)
3476 buf[0] = '-';
3477 }
3478 assert(len == numnondigits + numdigits);
3479 assert(numdigits > 0);
3480 }
3481
3482 /* Fill with leading zeroes to meet minimum width. */
3483 if (prec > numdigits) {
3484 PyObject *r1 = PyString_FromStringAndSize(NULL,
3485 numnondigits + prec);
3486 char *b1;
3487 if (!r1) {
3488 Py_DECREF(result);
3489 return NULL;
3490 }
3491 b1 = PyString_AS_STRING(r1);
3492 for (i = 0; i < numnondigits; ++i)
3493 *b1++ = *buf++;
3494 for (i = 0; i < prec - numdigits; i++)
3495 *b1++ = '0';
3496 for (i = 0; i < numdigits; i++)
3497 *b1++ = *buf++;
3498 *b1 = '\0';
3499 Py_DECREF(result);
3500 result = r1;
3501 buf = PyString_AS_STRING(result);
3502 len = numnondigits + prec;
3503 }
3504
3505 /* Fix up case for hex conversions. */
3506 switch (type) {
3507 case 'x':
3508 /* Need to convert all upper case letters to lower case. */
3509 for (i = 0; i < len; i++)
3510 if (buf[i] >= 'A' && buf[i] <= 'F')
3511 buf[i] += 'a'-'A';
3512 break;
3513 case 'X':
3514 /* Need to convert 0x to 0X (and -0x to -0X). */
3515 if (buf[sign + 1] == 'x')
3516 buf[sign + 1] = 'X';
3517 break;
3518 }
3519 *pbuf = buf;
3520 *plen = len;
3521 return result;
3522}
3523
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003524static int
Fred Drakeba096332000-07-09 07:04:36 +00003525formatint(char *buf, size_t buflen, int flags,
3526 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003527{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003528 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003529 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3530 + 1 + 1 = 24 */
3531 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003532 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003533
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003534 x = PyInt_AsLong(v);
3535 if (x == -1 && PyErr_Occurred()) {
3536 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003537 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003538 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003539 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003540 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003541 "%u/%o/%x/%X of negative int will return "
3542 "a signed string in Python 2.4 and up") < 0)
3543 return -1;
3544 }
Guido van Rossume5372401993-03-16 12:15:04 +00003545 if (prec < 0)
3546 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003547
3548 if ((flags & F_ALT) &&
3549 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003550 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003551 * of issues that cause pain:
3552 * - when 0 is being converted, the C standard leaves off
3553 * the '0x' or '0X', which is inconsistent with other
3554 * %#x/%#X conversions and inconsistent with Python's
3555 * hex() function
3556 * - there are platforms that violate the standard and
3557 * convert 0 with the '0x' or '0X'
3558 * (Metrowerks, Compaq Tru64)
3559 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003560 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003561 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003562 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003563 * We can achieve the desired consistency by inserting our
3564 * own '0x' or '0X' prefix, and substituting %x/%X in place
3565 * of %#x/%#X.
3566 *
3567 * Note that this is the same approach as used in
3568 * formatint() in unicodeobject.c
3569 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003570 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003571 type, prec, type);
3572 }
3573 else {
3574 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003575 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003576 prec, type);
3577 }
3578
Tim Peters38fd5b62000-09-21 05:43:11 +00003579 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003580 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3581 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003582 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003583 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003584 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003585 return -1;
3586 }
Tim Peters885d4572001-11-28 20:27:42 +00003587 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003588 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003589}
3590
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003591static int
Fred Drakeba096332000-07-09 07:04:36 +00003592formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003593{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003594 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003595 if (PyString_Check(v)) {
3596 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003597 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003598 }
3599 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003600 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003601 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003602 }
3603 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003604 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003605}
3606
Guido van Rossum013142a1994-08-30 08:19:36 +00003607
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003608/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3609
3610 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3611 chars are formatted. XXX This is a magic number. Each formatting
3612 routine does bounds checking to ensure no overflow, but a better
3613 solution may be to malloc a buffer of appropriate size for each
3614 format. For now, the current solution is sufficient.
3615*/
3616#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003617
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003618PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003619PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003620{
3621 char *fmt, *res;
3622 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003623 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003624 PyObject *result, *orig_args;
3625#ifdef Py_USING_UNICODE
3626 PyObject *v, *w;
3627#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003628 PyObject *dict = NULL;
3629 if (format == NULL || !PyString_Check(format) || args == NULL) {
3630 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003631 return NULL;
3632 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003633 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003634 fmt = PyString_AS_STRING(format);
3635 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003636 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003637 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003638 if (result == NULL)
3639 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003640 res = PyString_AsString(result);
3641 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003642 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003643 argidx = 0;
3644 }
3645 else {
3646 arglen = -1;
3647 argidx = -2;
3648 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003649 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3650 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003651 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003652 while (--fmtcnt >= 0) {
3653 if (*fmt != '%') {
3654 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003655 rescnt = fmtcnt + 100;
3656 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003657 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003658 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003659 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003660 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003661 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003662 }
3663 *res++ = *fmt++;
3664 }
3665 else {
3666 /* Got a format specifier */
3667 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003668 int width = -1;
3669 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003670 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003671 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003672 PyObject *v = NULL;
3673 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003674 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003675 int sign;
3676 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003677 char formatbuf[FORMATBUFLEN];
3678 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003679#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003680 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003681 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003682#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003683
Guido van Rossumda9c2711996-12-05 21:58:58 +00003684 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003685 if (*fmt == '(') {
3686 char *keystart;
3687 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003688 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003689 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003690
3691 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003692 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003693 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003694 goto error;
3695 }
3696 ++fmt;
3697 --fmtcnt;
3698 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003699 /* Skip over balanced parentheses */
3700 while (pcount > 0 && --fmtcnt >= 0) {
3701 if (*fmt == ')')
3702 --pcount;
3703 else if (*fmt == '(')
3704 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003705 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003706 }
3707 keylen = fmt - keystart - 1;
3708 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003709 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003710 "incomplete format key");
3711 goto error;
3712 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003713 key = PyString_FromStringAndSize(keystart,
3714 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003715 if (key == NULL)
3716 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003717 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003718 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003719 args_owned = 0;
3720 }
3721 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003722 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003723 if (args == NULL) {
3724 goto error;
3725 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003726 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003727 arglen = -1;
3728 argidx = -2;
3729 }
Guido van Rossume5372401993-03-16 12:15:04 +00003730 while (--fmtcnt >= 0) {
3731 switch (c = *fmt++) {
3732 case '-': flags |= F_LJUST; continue;
3733 case '+': flags |= F_SIGN; continue;
3734 case ' ': flags |= F_BLANK; continue;
3735 case '#': flags |= F_ALT; continue;
3736 case '0': flags |= F_ZERO; continue;
3737 }
3738 break;
3739 }
3740 if (c == '*') {
3741 v = getnextarg(args, arglen, &argidx);
3742 if (v == NULL)
3743 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003744 if (!PyInt_Check(v)) {
3745 PyErr_SetString(PyExc_TypeError,
3746 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003747 goto error;
3748 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003749 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003750 if (width < 0) {
3751 flags |= F_LJUST;
3752 width = -width;
3753 }
Guido van Rossume5372401993-03-16 12:15:04 +00003754 if (--fmtcnt >= 0)
3755 c = *fmt++;
3756 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003757 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003758 width = c - '0';
3759 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003760 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003761 if (!isdigit(c))
3762 break;
3763 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003764 PyErr_SetString(
3765 PyExc_ValueError,
3766 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003767 goto error;
3768 }
3769 width = width*10 + (c - '0');
3770 }
3771 }
3772 if (c == '.') {
3773 prec = 0;
3774 if (--fmtcnt >= 0)
3775 c = *fmt++;
3776 if (c == '*') {
3777 v = getnextarg(args, arglen, &argidx);
3778 if (v == NULL)
3779 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003780 if (!PyInt_Check(v)) {
3781 PyErr_SetString(
3782 PyExc_TypeError,
3783 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003784 goto error;
3785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003786 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003787 if (prec < 0)
3788 prec = 0;
3789 if (--fmtcnt >= 0)
3790 c = *fmt++;
3791 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003792 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003793 prec = c - '0';
3794 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003795 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003796 if (!isdigit(c))
3797 break;
3798 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003799 PyErr_SetString(
3800 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003801 "prec too big");
3802 goto error;
3803 }
3804 prec = prec*10 + (c - '0');
3805 }
3806 }
3807 } /* prec */
3808 if (fmtcnt >= 0) {
3809 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003810 if (--fmtcnt >= 0)
3811 c = *fmt++;
3812 }
3813 }
3814 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003815 PyErr_SetString(PyExc_ValueError,
3816 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003817 goto error;
3818 }
3819 if (c != '%') {
3820 v = getnextarg(args, arglen, &argidx);
3821 if (v == NULL)
3822 goto error;
3823 }
3824 sign = 0;
3825 fill = ' ';
3826 switch (c) {
3827 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003828 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003829 len = 1;
3830 break;
3831 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003832#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003833 if (PyUnicode_Check(v)) {
3834 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003835 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003836 goto unicode;
3837 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003838#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003839 /* Fall through */
3840 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003841 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003842 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003843 else
3844 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003845 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003846 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003847 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003848 /* XXX Note: this should never happen,
3849 since PyObject_Repr() and
3850 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003851 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003852 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003853 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003854 goto error;
3855 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003856 pbuf = PyString_AS_STRING(temp);
3857 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003858 if (prec >= 0 && len > prec)
3859 len = prec;
3860 break;
3861 case 'i':
3862 case 'd':
3863 case 'u':
3864 case 'o':
3865 case 'x':
3866 case 'X':
3867 if (c == 'i')
3868 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003869 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003870 temp = _PyString_FormatLong(v, flags,
3871 prec, c, &pbuf, &len);
3872 if (!temp)
3873 goto error;
3874 /* unbounded ints can always produce
3875 a sign character! */
3876 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003877 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003878 else {
3879 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003880 len = formatint(pbuf,
3881 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003882 flags, prec, c, v);
3883 if (len < 0)
3884 goto error;
3885 /* only d conversion is signed */
3886 sign = c == 'd';
3887 }
3888 if (flags & F_ZERO)
3889 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003890 break;
3891 case 'e':
3892 case 'E':
3893 case 'f':
3894 case 'g':
3895 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003896 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003897 len = formatfloat(pbuf, sizeof(formatbuf),
3898 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003899 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003900 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003901 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003902 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003903 fill = '0';
3904 break;
3905 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003906 pbuf = formatbuf;
3907 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003908 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003909 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003910 break;
3911 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003912 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003913 "unsupported format character '%c' (0x%x) "
3914 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003915 c, c,
3916 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003917 goto error;
3918 }
3919 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003920 if (*pbuf == '-' || *pbuf == '+') {
3921 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003922 len--;
3923 }
3924 else if (flags & F_SIGN)
3925 sign = '+';
3926 else if (flags & F_BLANK)
3927 sign = ' ';
3928 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003929 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003930 }
3931 if (width < len)
3932 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003933 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003934 reslen -= rescnt;
3935 rescnt = width + fmtcnt + 100;
3936 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003937 if (reslen < 0) {
3938 Py_DECREF(result);
3939 return PyErr_NoMemory();
3940 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003941 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003942 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003943 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003944 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003945 }
3946 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003947 if (fill != ' ')
3948 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003949 rescnt--;
3950 if (width > len)
3951 width--;
3952 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003953 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3954 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003955 assert(pbuf[1] == c);
3956 if (fill != ' ') {
3957 *res++ = *pbuf++;
3958 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003959 }
Tim Petersfff53252001-04-12 18:38:48 +00003960 rescnt -= 2;
3961 width -= 2;
3962 if (width < 0)
3963 width = 0;
3964 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003965 }
3966 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003967 do {
3968 --rescnt;
3969 *res++ = fill;
3970 } while (--width > len);
3971 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003972 if (fill == ' ') {
3973 if (sign)
3974 *res++ = sign;
3975 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003976 (c == 'x' || c == 'X')) {
3977 assert(pbuf[0] == '0');
3978 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003979 *res++ = *pbuf++;
3980 *res++ = *pbuf++;
3981 }
3982 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003983 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003984 res += len;
3985 rescnt -= len;
3986 while (--width >= len) {
3987 --rescnt;
3988 *res++ = ' ';
3989 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003990 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003992 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003993 goto error;
3994 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003995 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003996 } /* '%' */
3997 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003998 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004000 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004001 goto error;
4002 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004003 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004004 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004005 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004006 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004007 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004008
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004009#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004010 unicode:
4011 if (args_owned) {
4012 Py_DECREF(args);
4013 args_owned = 0;
4014 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004015 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004016 if (PyTuple_Check(orig_args) && argidx > 0) {
4017 PyObject *v;
4018 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4019 v = PyTuple_New(n);
4020 if (v == NULL)
4021 goto error;
4022 while (--n >= 0) {
4023 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4024 Py_INCREF(w);
4025 PyTuple_SET_ITEM(v, n, w);
4026 }
4027 args = v;
4028 } else {
4029 Py_INCREF(orig_args);
4030 args = orig_args;
4031 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004032 args_owned = 1;
4033 /* Take what we have of the result and let the Unicode formatting
4034 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004035 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004036 if (_PyString_Resize(&result, rescnt))
4037 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004038 fmtcnt = PyString_GET_SIZE(format) - \
4039 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004040 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4041 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004042 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004043 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004044 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004045 if (v == NULL)
4046 goto error;
4047 /* Paste what we have (result) to what the Unicode formatting
4048 function returned (v) and return the result (or error) */
4049 w = PyUnicode_Concat(result, v);
4050 Py_DECREF(result);
4051 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004052 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004053 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004054#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004055
Guido van Rossume5372401993-03-16 12:15:04 +00004056 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004057 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004058 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004059 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004060 }
Guido van Rossume5372401993-03-16 12:15:04 +00004061 return NULL;
4062}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004063
Guido van Rossum2a61e741997-01-18 07:55:05 +00004064void
Fred Drakeba096332000-07-09 07:04:36 +00004065PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004066{
4067 register PyStringObject *s = (PyStringObject *)(*p);
4068 PyObject *t;
4069 if (s == NULL || !PyString_Check(s))
4070 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004071 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004072 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004073 if (interned == NULL) {
4074 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004075 if (interned == NULL) {
4076 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004077 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004078 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004079 }
4080 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4081 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004082 Py_DECREF(*p);
4083 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004084 return;
4085 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004086 /* Ensure that only true string objects appear in the intern dict */
4087 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004088 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4089 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004090 if (t == NULL) {
4091 PyErr_Clear();
4092 return;
Tim Peters111f6092001-09-12 07:54:51 +00004093 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094 } else {
4095 t = (PyObject*) s;
4096 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004097 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004098
4099 if (PyDict_SetItem(interned, t, t) == 0) {
4100 /* The two references in interned are not counted by
4101 refcnt. The string deallocator will take care of this */
4102 ((PyObject *)t)->ob_refcnt-=2;
4103 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4104 Py_DECREF(*p);
4105 *p = t;
4106 return;
4107 }
4108 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004109 PyErr_Clear();
4110}
4111
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004112void
4113PyString_InternImmortal(PyObject **p)
4114{
4115 PyString_InternInPlace(p);
4116 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4117 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4118 Py_INCREF(*p);
4119 }
4120}
4121
Guido van Rossum2a61e741997-01-18 07:55:05 +00004122
4123PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004124PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004125{
4126 PyObject *s = PyString_FromString(cp);
4127 if (s == NULL)
4128 return NULL;
4129 PyString_InternInPlace(&s);
4130 return s;
4131}
4132
Guido van Rossum8cf04761997-08-02 02:57:45 +00004133void
Fred Drakeba096332000-07-09 07:04:36 +00004134PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004135{
4136 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004137 for (i = 0; i < UCHAR_MAX + 1; i++) {
4138 Py_XDECREF(characters[i]);
4139 characters[i] = NULL;
4140 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004141 Py_XDECREF(nullstring);
4142 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004143}
Barry Warsawa903ad982001-02-23 16:40:48 +00004144
Barry Warsawa903ad982001-02-23 16:40:48 +00004145void _Py_ReleaseInternedStrings(void)
4146{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004147 PyObject *keys;
4148 PyStringObject *s;
4149 int i, n;
4150
4151 if (interned == NULL || !PyDict_Check(interned))
4152 return;
4153 keys = PyDict_Keys(interned);
4154 if (keys == NULL || !PyList_Check(keys)) {
4155 PyErr_Clear();
4156 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004157 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004158
4159 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4160 detector, interned strings are not forcibly deallocated; rather, we
4161 give them their stolen references back, and then clear and DECREF
4162 the interned dict. */
4163
4164 fprintf(stderr, "releasing interned strings\n");
4165 n = PyList_GET_SIZE(keys);
4166 for (i = 0; i < n; i++) {
4167 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4168 switch (s->ob_sstate) {
4169 case SSTATE_NOT_INTERNED:
4170 /* XXX Shouldn't happen */
4171 break;
4172 case SSTATE_INTERNED_IMMORTAL:
4173 s->ob_refcnt += 1;
4174 break;
4175 case SSTATE_INTERNED_MORTAL:
4176 s->ob_refcnt += 2;
4177 break;
4178 default:
4179 Py_FatalError("Inconsistent interned string state.");
4180 }
4181 s->ob_sstate = SSTATE_NOT_INTERNED;
4182 }
4183 Py_DECREF(keys);
4184 PyDict_Clear(interned);
4185 Py_DECREF(interned);
4186 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004187}