blob: c234b0443baa713f5c02625447e5bf67fc66fe12 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000769 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771
Thomas Wouters7e474022000-07-16 12:04:32 +0000772 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000774 if (memchr(op->ob_sval, '\'', op->ob_size) &&
775 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000776 quote = '"';
777
778 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 for (i = 0; i < op->ob_size; i++) {
780 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000781 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\r");
789 else if (c < ' ' || c >= 0x7f)
790 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000791 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796}
797
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798PyObject *
799PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000801 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
803 PyObject *v;
804 if (newsize > INT_MAX) {
805 PyErr_SetString(PyExc_OverflowError,
806 "string is too large to make repr");
807 }
808 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
812 else {
813 register int i;
814 register char c;
815 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 int quote;
817
Thomas Wouters7e474022000-07-16 12:04:32 +0000818 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000820 if (smartquotes &&
821 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000822 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '"';
824
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000828 /* There's at least enough room for a hex escape
829 and a closing quote. */
830 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000834 else if (c == '\t')
835 *p++ = '\\', *p++ = 't';
836 else if (c == '\n')
837 *p++ = '\\', *p++ = 'n';
838 else if (c == '\r')
839 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 else if (c < ' ' || c >= 0x7f) {
841 /* For performance, we don't want to call
842 PyOS_snprintf here (extra layers of
843 function call). */
844 sprintf(p, "\\x%02x", c & 0xff);
845 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 else
848 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000850 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000853 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000855 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857}
858
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860string_repr(PyObject *op)
861{
862 return PyString_Repr(op, 1);
863}
864
865static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000866string_str(PyObject *s)
867{
Tim Petersc9933152001-10-16 20:18:24 +0000868 assert(PyString_Check(s));
869 if (PyString_CheckExact(s)) {
870 Py_INCREF(s);
871 return s;
872 }
873 else {
874 /* Subtype -- return genuine string with the same value. */
875 PyStringObject *t = (PyStringObject *) s;
876 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
877 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000878}
879
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880static int
Fred Drakeba096332000-07-09 07:04:36 +0000881string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882{
883 return a->ob_size;
884}
885
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000887string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888{
889 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 register PyStringObject *op;
891 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000893 if (PyUnicode_Check(bb))
894 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000895#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000896 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000897 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000898 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 return NULL;
900 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000901#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000903 if ((a->ob_size == 0 || b->ob_size == 0) &&
904 PyString_CheckExact(a) && PyString_CheckExact(b)) {
905 if (a->ob_size == 0) {
906 Py_INCREF(bb);
907 return bb;
908 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000909 Py_INCREF(a);
910 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
912 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000913 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000915 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000916 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000918 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000920 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
922 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
923 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925#undef b
926}
927
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000929string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930{
931 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000932 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000934 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000935 if (n < 0)
936 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000937 /* watch out for overflows: the size can overflow int,
938 * and the # of bytes needed can overflow size_t
939 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000941 if (n && size / n != a->ob_size) {
942 PyErr_SetString(PyExc_OverflowError,
943 "repeated string is too long");
944 return NULL;
945 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000946 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 Py_INCREF(a);
948 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 }
Tim Peters8f422462000-09-09 06:13:41 +0000950 nbytes = size * sizeof(char);
951 if (nbytes / sizeof(char) != (size_t)size ||
952 nbytes + sizeof(PyStringObject) <= nbytes) {
953 PyErr_SetString(PyExc_OverflowError,
954 "repeated string is too long");
955 return NULL;
956 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000958 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000959 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000960 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000961 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000962 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000963 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 for (i = 0; i < size; i += a->ob_size)
965 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
966 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968}
969
970/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
971
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000973string_slice(register PyStringObject *a, register int i, register int j)
974 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000975{
976 if (i < 0)
977 i = 0;
978 if (j < 0)
979 j = 0; /* Avoid signed/unsigned bug in next line */
980 if (j > a->ob_size)
981 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000982 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
983 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 Py_INCREF(a);
985 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000986 }
987 if (j < i)
988 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990}
991
Guido van Rossum9284a572000-03-07 15:53:43 +0000992static int
Fred Drakeba096332000-07-09 07:04:36 +0000993string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000994{
Barry Warsaw817918c2002-08-06 16:58:21 +0000995 const char *lhs, *rhs, *end;
996 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000997
998 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000999#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001000 if (PyUnicode_Check(el))
1001 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001002#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001003 if (!PyString_Check(el)) {
1004 PyErr_SetString(PyExc_TypeError,
1005 "'in <string>' requires string as left operand");
1006 return -1;
1007 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001008 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001009 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001010 rhs = PyString_AS_STRING(el);
1011 lhs = PyString_AS_STRING(a);
1012
1013 /* optimize for a single character */
1014 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001015 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001016
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001018 while (lhs <= end) {
1019 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001020 return 1;
1021 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001022
Guido van Rossum9284a572000-03-07 15:53:43 +00001023 return 0;
1024}
1025
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001027string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001029 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001030 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001031 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001033 return NULL;
1034 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001035 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001036 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001037 if (v == NULL)
1038 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001039 else {
1040#ifdef COUNT_ALLOCS
1041 one_strings++;
1042#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001043 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001044 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001045 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046}
1047
Martin v. Löwiscd353062001-05-24 16:56:35 +00001048static PyObject*
1049string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001051 int c;
1052 int len_a, len_b;
1053 int min_len;
1054 PyObject *result;
1055
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001056 /* Make sure both arguments are strings. */
1057 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001058 result = Py_NotImplemented;
1059 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001060 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001061 if (a == b) {
1062 switch (op) {
1063 case Py_EQ:case Py_LE:case Py_GE:
1064 result = Py_True;
1065 goto out;
1066 case Py_NE:case Py_LT:case Py_GT:
1067 result = Py_False;
1068 goto out;
1069 }
1070 }
1071 if (op == Py_EQ) {
1072 /* Supporting Py_NE here as well does not save
1073 much time, since Py_NE is rarely used. */
1074 if (a->ob_size == b->ob_size
1075 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001076 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001077 a->ob_size) == 0)) {
1078 result = Py_True;
1079 } else {
1080 result = Py_False;
1081 }
1082 goto out;
1083 }
1084 len_a = a->ob_size; len_b = b->ob_size;
1085 min_len = (len_a < len_b) ? len_a : len_b;
1086 if (min_len > 0) {
1087 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1088 if (c==0)
1089 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1090 }else
1091 c = 0;
1092 if (c == 0)
1093 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1094 switch (op) {
1095 case Py_LT: c = c < 0; break;
1096 case Py_LE: c = c <= 0; break;
1097 case Py_EQ: assert(0); break; /* unreachable */
1098 case Py_NE: c = c != 0; break;
1099 case Py_GT: c = c > 0; break;
1100 case Py_GE: c = c >= 0; break;
1101 default:
1102 result = Py_NotImplemented;
1103 goto out;
1104 }
1105 result = c ? Py_True : Py_False;
1106 out:
1107 Py_INCREF(result);
1108 return result;
1109}
1110
1111int
1112_PyString_Eq(PyObject *o1, PyObject *o2)
1113{
1114 PyStringObject *a, *b;
1115 a = (PyStringObject*)o1;
1116 b = (PyStringObject*)o2;
1117 return a->ob_size == b->ob_size
1118 && *a->ob_sval == *b->ob_sval
1119 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001120}
1121
Guido van Rossum9bfef441993-03-29 10:43:31 +00001122static long
Fred Drakeba096332000-07-09 07:04:36 +00001123string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001124{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001125 register int len;
1126 register unsigned char *p;
1127 register long x;
1128
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 if (a->ob_shash != -1)
1130 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001131 len = a->ob_size;
1132 p = (unsigned char *) a->ob_sval;
1133 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001134 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001135 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001136 x ^= a->ob_size;
1137 if (x == -1)
1138 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001139 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001140 return x;
1141}
1142
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001143static PyObject*
1144string_subscript(PyStringObject* self, PyObject* item)
1145{
1146 if (PyInt_Check(item)) {
1147 long i = PyInt_AS_LONG(item);
1148 if (i < 0)
1149 i += PyString_GET_SIZE(self);
1150 return string_item(self,i);
1151 }
1152 else if (PyLong_Check(item)) {
1153 long i = PyLong_AsLong(item);
1154 if (i == -1 && PyErr_Occurred())
1155 return NULL;
1156 if (i < 0)
1157 i += PyString_GET_SIZE(self);
1158 return string_item(self,i);
1159 }
1160 else if (PySlice_Check(item)) {
1161 int start, stop, step, slicelength, cur, i;
1162 char* source_buf;
1163 char* result_buf;
1164 PyObject* result;
1165
1166 if (PySlice_GetIndicesEx((PySliceObject*)item,
1167 PyString_GET_SIZE(self),
1168 &start, &stop, &step, &slicelength) < 0) {
1169 return NULL;
1170 }
1171
1172 if (slicelength <= 0) {
1173 return PyString_FromStringAndSize("", 0);
1174 }
1175 else {
1176 source_buf = PyString_AsString((PyObject*)self);
1177 result_buf = PyMem_Malloc(slicelength);
1178
1179 for (cur = start, i = 0; i < slicelength;
1180 cur += step, i++) {
1181 result_buf[i] = source_buf[cur];
1182 }
1183
1184 result = PyString_FromStringAndSize(result_buf,
1185 slicelength);
1186 PyMem_Free(result_buf);
1187 return result;
1188 }
1189 }
1190 else {
1191 PyErr_SetString(PyExc_TypeError,
1192 "string indices must be integers");
1193 return NULL;
1194 }
1195}
1196
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001197static int
Fred Drakeba096332000-07-09 07:04:36 +00001198string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001199{
1200 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001201 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001202 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001203 return -1;
1204 }
1205 *ptr = (void *)self->ob_sval;
1206 return self->ob_size;
1207}
1208
1209static int
Fred Drakeba096332000-07-09 07:04:36 +00001210string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001211{
Guido van Rossum045e6881997-09-08 18:30:11 +00001212 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001213 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001214 return -1;
1215}
1216
1217static int
Fred Drakeba096332000-07-09 07:04:36 +00001218string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001219{
1220 if ( lenp )
1221 *lenp = self->ob_size;
1222 return 1;
1223}
1224
Guido van Rossum1db70701998-10-08 02:18:52 +00001225static int
Fred Drakeba096332000-07-09 07:04:36 +00001226string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001227{
1228 if ( index != 0 ) {
1229 PyErr_SetString(PyExc_SystemError,
1230 "accessing non-existent string segment");
1231 return -1;
1232 }
1233 *ptr = self->ob_sval;
1234 return self->ob_size;
1235}
1236
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001237static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001238 (inquiry)string_length, /*sq_length*/
1239 (binaryfunc)string_concat, /*sq_concat*/
1240 (intargfunc)string_repeat, /*sq_repeat*/
1241 (intargfunc)string_item, /*sq_item*/
1242 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001243 0, /*sq_ass_item*/
1244 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001245 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001246};
1247
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248static PyMappingMethods string_as_mapping = {
1249 (inquiry)string_length,
1250 (binaryfunc)string_subscript,
1251 0,
1252};
1253
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254static PyBufferProcs string_as_buffer = {
1255 (getreadbufferproc)string_buffer_getreadbuf,
1256 (getwritebufferproc)string_buffer_getwritebuf,
1257 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001258 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001259};
1260
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261
1262
1263#define LEFTSTRIP 0
1264#define RIGHTSTRIP 1
1265#define BOTHSTRIP 2
1266
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001267/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001268static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1269
1270#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001271
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272
1273static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001274split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001277 PyObject* item;
1278 PyObject *list = PyList_New(0);
1279
1280 if (list == NULL)
1281 return NULL;
1282
Guido van Rossum4c08d552000-03-10 22:55:18 +00001283 for (i = j = 0; i < len; ) {
1284 while (i < len && isspace(Py_CHARMASK(s[i])))
1285 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 while (i < len && !isspace(Py_CHARMASK(s[i])))
1288 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 if (maxsplit-- <= 0)
1291 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1293 if (item == NULL)
1294 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295 err = PyList_Append(list, item);
1296 Py_DECREF(item);
1297 if (err < 0)
1298 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001299 while (i < len && isspace(Py_CHARMASK(s[i])))
1300 i++;
1301 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 }
1303 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 if (j < len) {
1305 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1306 if (item == NULL)
1307 goto finally;
1308 err = PyList_Append(list, item);
1309 Py_DECREF(item);
1310 if (err < 0)
1311 goto finally;
1312 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313 return list;
1314 finally:
1315 Py_DECREF(list);
1316 return NULL;
1317}
1318
1319
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001320PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321"S.split([sep [,maxsplit]]) -> list of strings\n\
1322\n\
1323Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001324delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001325splits are done. If sep is not specified or is None, any\n\
1326whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327
1328static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001329string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330{
1331 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 int maxsplit = -1;
1333 const char *s = PyString_AS_STRING(self), *sub;
1334 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 if (maxsplit < 0)
1339 maxsplit = INT_MAX;
1340 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001342 if (PyString_Check(subobj)) {
1343 sub = PyString_AS_STRING(subobj);
1344 n = PyString_GET_SIZE(subobj);
1345 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001346#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 else if (PyUnicode_Check(subobj))
1348 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001349#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001350 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1351 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 if (n == 0) {
1353 PyErr_SetString(PyExc_ValueError, "empty separator");
1354 return NULL;
1355 }
1356
1357 list = PyList_New(0);
1358 if (list == NULL)
1359 return NULL;
1360
1361 i = j = 0;
1362 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001363 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 if (maxsplit-- <= 0)
1365 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1367 if (item == NULL)
1368 goto fail;
1369 err = PyList_Append(list, item);
1370 Py_DECREF(item);
1371 if (err < 0)
1372 goto fail;
1373 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 }
1375 else
1376 i++;
1377 }
1378 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1379 if (item == NULL)
1380 goto fail;
1381 err = PyList_Append(list, item);
1382 Py_DECREF(item);
1383 if (err < 0)
1384 goto fail;
1385
1386 return list;
1387
1388 fail:
1389 Py_DECREF(list);
1390 return NULL;
1391}
1392
1393
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001394PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395"S.join(sequence) -> string\n\
1396\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001397Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001398sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399
1400static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001401string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402{
1403 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001404 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406 char *p;
1407 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001408 size_t sz = 0;
1409 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001410 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411
Tim Peters19fe14e2001-01-19 03:03:47 +00001412 seq = PySequence_Fast(orig, "");
1413 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001414 if (PyErr_ExceptionMatches(PyExc_TypeError))
1415 PyErr_Format(PyExc_TypeError,
1416 "sequence expected, %.80s found",
1417 orig->ob_type->tp_name);
1418 return NULL;
1419 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001420
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001421 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001422 if (seqlen == 0) {
1423 Py_DECREF(seq);
1424 return PyString_FromString("");
1425 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001427 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001428 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1429 PyErr_Format(PyExc_TypeError,
1430 "sequence item 0: expected string,"
1431 " %.80s found",
1432 item->ob_type->tp_name);
1433 Py_DECREF(seq);
1434 return NULL;
1435 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001436 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001437 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001438 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001440
Tim Peters19fe14e2001-01-19 03:03:47 +00001441 /* There are at least two things to join. Do a pre-pass to figure out
1442 * the total amount of space we'll need (sz), see whether any argument
1443 * is absurd, and defer to the Unicode join if appropriate.
1444 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001445 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001446 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001447 item = PySequence_Fast_GET_ITEM(seq, i);
1448 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001449#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001450 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001451 /* Defer to Unicode join.
1452 * CAUTION: There's no gurantee that the
1453 * original sequence can be iterated over
1454 * again, so we must pass seq here.
1455 */
1456 PyObject *result;
1457 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001458 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001459 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001461#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001463 "sequence item %i: expected string,"
1464 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001465 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 Py_DECREF(seq);
1467 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001468 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001469 sz += PyString_GET_SIZE(item);
1470 if (i != 0)
1471 sz += seplen;
1472 if (sz < old_sz || sz > INT_MAX) {
1473 PyErr_SetString(PyExc_OverflowError,
1474 "join() is too long for a Python string");
1475 Py_DECREF(seq);
1476 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001478 }
1479
1480 /* Allocate result space. */
1481 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1482 if (res == NULL) {
1483 Py_DECREF(seq);
1484 return NULL;
1485 }
1486
1487 /* Catenate everything. */
1488 p = PyString_AS_STRING(res);
1489 for (i = 0; i < seqlen; ++i) {
1490 size_t n;
1491 item = PySequence_Fast_GET_ITEM(seq, i);
1492 n = PyString_GET_SIZE(item);
1493 memcpy(p, PyString_AS_STRING(item), n);
1494 p += n;
1495 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001496 memcpy(p, sep, seplen);
1497 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001498 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001500
Jeremy Hylton49048292000-07-11 03:28:17 +00001501 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503}
1504
Tim Peters52e155e2001-06-16 05:42:57 +00001505PyObject *
1506_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001507{
Tim Petersa7259592001-06-16 05:11:17 +00001508 assert(sep != NULL && PyString_Check(sep));
1509 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001510 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001511}
1512
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001513static void
1514string_adjust_indices(int *start, int *end, int len)
1515{
1516 if (*end > len)
1517 *end = len;
1518 else if (*end < 0)
1519 *end += len;
1520 if (*end < 0)
1521 *end = 0;
1522 if (*start < 0)
1523 *start += len;
1524 if (*start < 0)
1525 *start = 0;
1526}
1527
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528static long
Fred Drakeba096332000-07-09 07:04:36 +00001529string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001531 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 int len = PyString_GET_SIZE(self);
1533 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001534 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001536 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001537 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 return -2;
1539 if (PyString_Check(subobj)) {
1540 sub = PyString_AS_STRING(subobj);
1541 n = PyString_GET_SIZE(subobj);
1542 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001543#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001544 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001545 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001546#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548 return -2;
1549
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001550 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 if (dir > 0) {
1553 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 last -= n;
1556 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001557 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 return (long)i;
1559 }
1560 else {
1561 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001562
Guido van Rossum4c08d552000-03-10 22:55:18 +00001563 if (n == 0 && i <= last)
1564 return (long)last;
1565 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001566 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 return (long)j;
1568 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001569
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 return -1;
1571}
1572
1573
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001574PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575"S.find(sub [,start [,end]]) -> int\n\
1576\n\
1577Return the lowest index in S where substring sub is found,\n\
1578such that sub is contained within s[start,end]. Optional\n\
1579arguments start and end are interpreted as in slice notation.\n\
1580\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001581Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582
1583static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001584string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001586 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 if (result == -2)
1588 return NULL;
1589 return PyInt_FromLong(result);
1590}
1591
1592
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594"S.index(sub [,start [,end]]) -> int\n\
1595\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001596Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597
1598static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001599string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001601 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (result == -2)
1603 return NULL;
1604 if (result == -1) {
1605 PyErr_SetString(PyExc_ValueError,
1606 "substring not found in string.index");
1607 return NULL;
1608 }
1609 return PyInt_FromLong(result);
1610}
1611
1612
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614"S.rfind(sub [,start [,end]]) -> int\n\
1615\n\
1616Return the highest index in S where substring sub is found,\n\
1617such that sub is contained within s[start,end]. Optional\n\
1618arguments start and end are interpreted as in slice notation.\n\
1619\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001620Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621
1622static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001623string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001625 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 if (result == -2)
1627 return NULL;
1628 return PyInt_FromLong(result);
1629}
1630
1631
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633"S.rindex(sub [,start [,end]]) -> int\n\
1634\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
1637static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001638string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001640 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 if (result == -2)
1642 return NULL;
1643 if (result == -1) {
1644 PyErr_SetString(PyExc_ValueError,
1645 "substring not found in string.rindex");
1646 return NULL;
1647 }
1648 return PyInt_FromLong(result);
1649}
1650
1651
1652static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001653do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1654{
1655 char *s = PyString_AS_STRING(self);
1656 int len = PyString_GET_SIZE(self);
1657 char *sep = PyString_AS_STRING(sepobj);
1658 int seplen = PyString_GET_SIZE(sepobj);
1659 int i, j;
1660
1661 i = 0;
1662 if (striptype != RIGHTSTRIP) {
1663 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1664 i++;
1665 }
1666 }
1667
1668 j = len;
1669 if (striptype != LEFTSTRIP) {
1670 do {
1671 j--;
1672 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1673 j++;
1674 }
1675
1676 if (i == 0 && j == len && PyString_CheckExact(self)) {
1677 Py_INCREF(self);
1678 return (PyObject*)self;
1679 }
1680 else
1681 return PyString_FromStringAndSize(s+i, j-i);
1682}
1683
1684
1685static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001686do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687{
1688 char *s = PyString_AS_STRING(self);
1689 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 i = 0;
1692 if (striptype != RIGHTSTRIP) {
1693 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1694 i++;
1695 }
1696 }
1697
1698 j = len;
1699 if (striptype != LEFTSTRIP) {
1700 do {
1701 j--;
1702 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1703 j++;
1704 }
1705
Tim Peters8fa5dd02001-09-12 02:18:30 +00001706 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707 Py_INCREF(self);
1708 return (PyObject*)self;
1709 }
1710 else
1711 return PyString_FromStringAndSize(s+i, j-i);
1712}
1713
1714
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001715static PyObject *
1716do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1717{
1718 PyObject *sep = NULL;
1719
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001720 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001721 return NULL;
1722
1723 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001724 if (PyString_Check(sep))
1725 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001726#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001727 else if (PyUnicode_Check(sep)) {
1728 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1729 PyObject *res;
1730 if (uniself==NULL)
1731 return NULL;
1732 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1733 striptype, sep);
1734 Py_DECREF(uniself);
1735 return res;
1736 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001737#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001738 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001739 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001740#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001741 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001742#else
1743 "%s arg must be None or str",
1744#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001745 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001746 return NULL;
1747 }
1748 return do_xstrip(self, striptype, sep);
1749 }
1750
1751 return do_strip(self, striptype);
1752}
1753
1754
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001755PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001756"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757\n\
1758Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001759whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001760If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001761If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762
1763static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001764string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001766 if (PyTuple_GET_SIZE(args) == 0)
1767 return do_strip(self, BOTHSTRIP); /* Common case */
1768 else
1769 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770}
1771
1772
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001773PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001774"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001777If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, LEFTSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001794If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001800 if (PyTuple_GET_SIZE(args) == 0)
1801 return do_strip(self, RIGHTSTRIP); /* Common case */
1802 else
1803 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804}
1805
1806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808"S.lower() -> string\n\
1809\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
1812static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001813string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814{
1815 char *s = PyString_AS_STRING(self), *s_new;
1816 int i, n = PyString_GET_SIZE(self);
1817 PyObject *new;
1818
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 new = PyString_FromStringAndSize(NULL, n);
1820 if (new == NULL)
1821 return NULL;
1822 s_new = PyString_AsString(new);
1823 for (i = 0; i < n; i++) {
1824 int c = Py_CHARMASK(*s++);
1825 if (isupper(c)) {
1826 *s_new = tolower(c);
1827 } else
1828 *s_new = c;
1829 s_new++;
1830 }
1831 return new;
1832}
1833
1834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836"S.upper() -> string\n\
1837\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001838Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
1840static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001841string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842{
1843 char *s = PyString_AS_STRING(self), *s_new;
1844 int i, n = PyString_GET_SIZE(self);
1845 PyObject *new;
1846
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 new = PyString_FromStringAndSize(NULL, n);
1848 if (new == NULL)
1849 return NULL;
1850 s_new = PyString_AsString(new);
1851 for (i = 0; i < n; i++) {
1852 int c = Py_CHARMASK(*s++);
1853 if (islower(c)) {
1854 *s_new = toupper(c);
1855 } else
1856 *s_new = c;
1857 s_new++;
1858 }
1859 return new;
1860}
1861
1862
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001863PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864"S.title() -> string\n\
1865\n\
1866Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001867characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868
1869static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001870string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871{
1872 char *s = PyString_AS_STRING(self), *s_new;
1873 int i, n = PyString_GET_SIZE(self);
1874 int previous_is_cased = 0;
1875 PyObject *new;
1876
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 new = PyString_FromStringAndSize(NULL, n);
1878 if (new == NULL)
1879 return NULL;
1880 s_new = PyString_AsString(new);
1881 for (i = 0; i < n; i++) {
1882 int c = Py_CHARMASK(*s++);
1883 if (islower(c)) {
1884 if (!previous_is_cased)
1885 c = toupper(c);
1886 previous_is_cased = 1;
1887 } else if (isupper(c)) {
1888 if (previous_is_cased)
1889 c = tolower(c);
1890 previous_is_cased = 1;
1891 } else
1892 previous_is_cased = 0;
1893 *s_new++ = c;
1894 }
1895 return new;
1896}
1897
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001898PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899"S.capitalize() -> string\n\
1900\n\
1901Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001902capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903
1904static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001905string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906{
1907 char *s = PyString_AS_STRING(self), *s_new;
1908 int i, n = PyString_GET_SIZE(self);
1909 PyObject *new;
1910
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 new = PyString_FromStringAndSize(NULL, n);
1912 if (new == NULL)
1913 return NULL;
1914 s_new = PyString_AsString(new);
1915 if (0 < n) {
1916 int c = Py_CHARMASK(*s++);
1917 if (islower(c))
1918 *s_new = toupper(c);
1919 else
1920 *s_new = c;
1921 s_new++;
1922 }
1923 for (i = 1; i < n; i++) {
1924 int c = Py_CHARMASK(*s++);
1925 if (isupper(c))
1926 *s_new = tolower(c);
1927 else
1928 *s_new = c;
1929 s_new++;
1930 }
1931 return new;
1932}
1933
1934
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001935PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936"S.count(sub[, start[, end]]) -> int\n\
1937\n\
1938Return the number of occurrences of substring sub in string\n\
1939S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001940interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941
1942static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001943string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946 int len = PyString_GET_SIZE(self), n;
1947 int i = 0, last = INT_MAX;
1948 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950
Guido van Rossumc6821402000-05-08 14:08:05 +00001951 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1952 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001954
Guido van Rossum4c08d552000-03-10 22:55:18 +00001955 if (PyString_Check(subobj)) {
1956 sub = PyString_AS_STRING(subobj);
1957 n = PyString_GET_SIZE(subobj);
1958 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001959#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001960 else if (PyUnicode_Check(subobj)) {
1961 int count;
1962 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1963 if (count == -1)
1964 return NULL;
1965 else
1966 return PyInt_FromLong((long) count);
1967 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001968#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001969 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1970 return NULL;
1971
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001972 string_adjust_indices(&i, &last, len);
1973
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974 m = last + 1 - n;
1975 if (n == 0)
1976 return PyInt_FromLong((long) (m-i));
1977
1978 r = 0;
1979 while (i < m) {
1980 if (!memcmp(s+i, sub, n)) {
1981 r++;
1982 i += n;
1983 } else {
1984 i++;
1985 }
1986 }
1987 return PyInt_FromLong((long) r);
1988}
1989
1990
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001991PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992"S.swapcase() -> string\n\
1993\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001994Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001995converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996
1997static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001998string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999{
2000 char *s = PyString_AS_STRING(self), *s_new;
2001 int i, n = PyString_GET_SIZE(self);
2002 PyObject *new;
2003
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004 new = PyString_FromStringAndSize(NULL, n);
2005 if (new == NULL)
2006 return NULL;
2007 s_new = PyString_AsString(new);
2008 for (i = 0; i < n; i++) {
2009 int c = Py_CHARMASK(*s++);
2010 if (islower(c)) {
2011 *s_new = toupper(c);
2012 }
2013 else if (isupper(c)) {
2014 *s_new = tolower(c);
2015 }
2016 else
2017 *s_new = c;
2018 s_new++;
2019 }
2020 return new;
2021}
2022
2023
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025"S.translate(table [,deletechars]) -> string\n\
2026\n\
2027Return a copy of the string S, where all characters occurring\n\
2028in the optional argument deletechars are removed, and the\n\
2029remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002030translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031
2032static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002033string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002035 register char *input, *output;
2036 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 register int i, c, changed = 0;
2038 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 int inlen, tablen, dellen = 0;
2041 PyObject *result;
2042 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045 if (!PyArg_ParseTuple(args, "O|O:translate",
2046 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048
2049 if (PyString_Check(tableobj)) {
2050 table1 = PyString_AS_STRING(tableobj);
2051 tablen = PyString_GET_SIZE(tableobj);
2052 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002053#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002055 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 parameter; instead a mapping to None will cause characters
2057 to be deleted. */
2058 if (delobj != NULL) {
2059 PyErr_SetString(PyExc_TypeError,
2060 "deletions are implemented differently for unicode");
2061 return NULL;
2062 }
2063 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2064 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002065#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002066 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068
2069 if (delobj != NULL) {
2070 if (PyString_Check(delobj)) {
2071 del_table = PyString_AS_STRING(delobj);
2072 dellen = PyString_GET_SIZE(delobj);
2073 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002074#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002075 else if (PyUnicode_Check(delobj)) {
2076 PyErr_SetString(PyExc_TypeError,
2077 "deletions are implemented differently for unicode");
2078 return NULL;
2079 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002080#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2082 return NULL;
2083
2084 if (tablen != 256) {
2085 PyErr_SetString(PyExc_ValueError,
2086 "translation table must be 256 characters long");
2087 return NULL;
2088 }
2089 }
2090 else {
2091 del_table = NULL;
2092 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093 }
2094
2095 table = table1;
2096 inlen = PyString_Size(input_obj);
2097 result = PyString_FromStringAndSize((char *)NULL, inlen);
2098 if (result == NULL)
2099 return NULL;
2100 output_start = output = PyString_AsString(result);
2101 input = PyString_AsString(input_obj);
2102
2103 if (dellen == 0) {
2104 /* If no deletions are required, use faster code */
2105 for (i = inlen; --i >= 0; ) {
2106 c = Py_CHARMASK(*input++);
2107 if (Py_CHARMASK((*output++ = table[c])) != c)
2108 changed = 1;
2109 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002110 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111 return result;
2112 Py_DECREF(result);
2113 Py_INCREF(input_obj);
2114 return input_obj;
2115 }
2116
2117 for (i = 0; i < 256; i++)
2118 trans_table[i] = Py_CHARMASK(table[i]);
2119
2120 for (i = 0; i < dellen; i++)
2121 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2122
2123 for (i = inlen; --i >= 0; ) {
2124 c = Py_CHARMASK(*input++);
2125 if (trans_table[c] != -1)
2126 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2127 continue;
2128 changed = 1;
2129 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002130 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131 Py_DECREF(result);
2132 Py_INCREF(input_obj);
2133 return input_obj;
2134 }
2135 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002136 if (inlen > 0)
2137 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138 return result;
2139}
2140
2141
2142/* What follows is used for implementing replace(). Perry Stoll. */
2143
2144/*
2145 mymemfind
2146
2147 strstr replacement for arbitrary blocks of memory.
2148
Barry Warsaw51ac5802000-03-20 16:36:48 +00002149 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 contents of memory pointed to by PAT. Returns the index into MEM if
2151 found, or -1 if not found. If len of PAT is greater than length of
2152 MEM, the function returns -1.
2153*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002154static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002155mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156{
2157 register int ii;
2158
2159 /* pattern can not occur in the last pat_len-1 chars */
2160 len -= pat_len;
2161
2162 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002163 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 return ii;
2165 }
2166 }
2167 return -1;
2168}
2169
2170/*
2171 mymemcnt
2172
2173 Return the number of distinct times PAT is found in MEM.
2174 meaning mem=1111 and pat==11 returns 2.
2175 mem=11111 and pat==11 also return 2.
2176 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002177static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002178mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179{
2180 register int offset = 0;
2181 int nfound = 0;
2182
2183 while (len >= 0) {
2184 offset = mymemfind(mem, len, pat, pat_len);
2185 if (offset == -1)
2186 break;
2187 mem += offset + pat_len;
2188 len -= offset + pat_len;
2189 nfound++;
2190 }
2191 return nfound;
2192}
2193
2194/*
2195 mymemreplace
2196
Thomas Wouters7e474022000-07-16 12:04:32 +00002197 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198 replaced with SUB.
2199
Thomas Wouters7e474022000-07-16 12:04:32 +00002200 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201 of PAT in STR, then the original string is returned. Otherwise, a new
2202 string is allocated here and returned.
2203
2204 on return, out_len is:
2205 the length of output string, or
2206 -1 if the input string is returned, or
2207 unchanged if an error occurs (no memory).
2208
2209 return value is:
2210 the new string allocated locally, or
2211 NULL if an error occurred.
2212*/
2213static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002214mymemreplace(const char *str, int len, /* input string */
2215 const char *pat, int pat_len, /* pattern string to find */
2216 const char *sub, int sub_len, /* substitution string */
2217 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002218 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219{
2220 char *out_s;
2221 char *new_s;
2222 int nfound, offset, new_len;
2223
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002224 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 goto return_same;
2226
2227 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002228 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002229 if (count < 0)
2230 count = INT_MAX;
2231 else if (nfound > count)
2232 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 if (nfound == 0)
2234 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002237 if (new_len == 0) {
2238 /* Have to allocate something for the caller to free(). */
2239 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002240 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002241 return NULL;
2242 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002244 else {
2245 assert(new_len > 0);
2246 new_s = (char *)PyMem_MALLOC(new_len);
2247 if (new_s == NULL)
2248 return NULL;
2249 out_s = new_s;
2250
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002251 if (pat_len > 0) {
2252 for (; nfound > 0; --nfound) {
2253 /* find index of next instance of pattern */
2254 offset = mymemfind(str, len, pat, pat_len);
2255 if (offset == -1)
2256 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002257
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002258 /* copy non matching part of input string */
2259 memcpy(new_s, str, offset);
2260 str += offset + pat_len;
2261 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002262
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002263 /* copy substitute into the output string */
2264 new_s += offset;
2265 memcpy(new_s, sub, sub_len);
2266 new_s += sub_len;
2267 }
2268 /* copy any remaining values into output string */
2269 if (len > 0)
2270 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002271 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002272 else {
2273 for (;;++str, --len) {
2274 memcpy(new_s, sub, sub_len);
2275 new_s += sub_len;
2276 if (--nfound <= 0) {
2277 memcpy(new_s, str, len);
2278 break;
2279 }
2280 *new_s++ = *str;
2281 }
2282 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002283 }
2284 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285 return out_s;
2286
2287 return_same:
2288 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002289 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290}
2291
2292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294"S.replace (old, new[, maxsplit]) -> string\n\
2295\n\
2296Return a copy of string S with all occurrences of substring\n\
2297old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002298given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299
2300static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002301string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 const char *str = PyString_AS_STRING(self), *sub, *repl;
2304 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002305 const int len = PyString_GET_SIZE(self);
2306 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 if (!PyArg_ParseTuple(args, "OO|i:replace",
2312 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314
2315 if (PyString_Check(subobj)) {
2316 sub = PyString_AS_STRING(subobj);
2317 sub_len = PyString_GET_SIZE(subobj);
2318 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002319#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002321 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002323#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2325 return NULL;
2326
2327 if (PyString_Check(replobj)) {
2328 repl = PyString_AS_STRING(replobj);
2329 repl_len = PyString_GET_SIZE(replobj);
2330 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002331#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002332 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002333 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002335#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2337 return NULL;
2338
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 if (new_s == NULL) {
2341 PyErr_NoMemory();
2342 return NULL;
2343 }
2344 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002345 if (PyString_CheckExact(self)) {
2346 /* we're returning another reference to self */
2347 new = (PyObject*)self;
2348 Py_INCREF(new);
2349 }
2350 else {
2351 new = PyString_FromStringAndSize(str, len);
2352 if (new == NULL)
2353 return NULL;
2354 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355 }
2356 else {
2357 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002358 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359 }
2360 return new;
2361}
2362
2363
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002364PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002365"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002367Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002369comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370
2371static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002372string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 int plen;
2378 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002379 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381
Guido van Rossumc6821402000-05-08 14:08:05 +00002382 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2383 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 return NULL;
2385 if (PyString_Check(subobj)) {
2386 prefix = PyString_AS_STRING(subobj);
2387 plen = PyString_GET_SIZE(subobj);
2388 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002389#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002390 else if (PyUnicode_Check(subobj)) {
2391 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002392 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002393 subobj, start, end, -1);
2394 if (rc == -1)
2395 return NULL;
2396 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002397 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401 return NULL;
2402
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002403 string_adjust_indices(&start, &end, len);
2404
2405 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002406 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002408 if (end-start >= plen)
2409 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2410 else
2411 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412}
2413
2414
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002415PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002416"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002418Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002420comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421
2422static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002423string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 const char* suffix;
2428 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002430 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002431 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432
Guido van Rossumc6821402000-05-08 14:08:05 +00002433 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2434 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 return NULL;
2436 if (PyString_Check(subobj)) {
2437 suffix = PyString_AS_STRING(subobj);
2438 slen = PyString_GET_SIZE(subobj);
2439 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002440#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002441 else if (PyUnicode_Check(subobj)) {
2442 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002443 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002444 subobj, start, end, +1);
2445 if (rc == -1)
2446 return NULL;
2447 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002448 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002449 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002450#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452 return NULL;
2453
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002454 string_adjust_indices(&start, &end, len);
2455
2456 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002457 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002459 if (end-slen > start)
2460 start = end - slen;
2461 if (end-start >= slen)
2462 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2463 else
2464 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465}
2466
2467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002468PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002469"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002470\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002471Encodes S using the codec registered for encoding. encoding defaults\n\
2472to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002473handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002474a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2475'xmlcharrefreplace' as well as any other name registered with\n\
2476codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002477
2478static PyObject *
2479string_encode(PyStringObject *self, PyObject *args)
2480{
2481 char *encoding = NULL;
2482 char *errors = NULL;
2483 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2484 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002485 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2486}
2487
2488
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002489PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002490"S.decode([encoding[,errors]]) -> object\n\
2491\n\
2492Decodes S using the codec registered for encoding. encoding defaults\n\
2493to the default encoding. errors may be given to set a different error\n\
2494handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002495a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2496as well as any other name registerd with codecs.register_error that is\n\
2497able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002498
2499static PyObject *
2500string_decode(PyStringObject *self, PyObject *args)
2501{
2502 char *encoding = NULL;
2503 char *errors = NULL;
2504 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2505 return NULL;
2506 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002507}
2508
2509
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002510PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511"S.expandtabs([tabsize]) -> string\n\
2512\n\
2513Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002514If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515
2516static PyObject*
2517string_expandtabs(PyStringObject *self, PyObject *args)
2518{
2519 const char *e, *p;
2520 char *q;
2521 int i, j;
2522 PyObject *u;
2523 int tabsize = 8;
2524
2525 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2526 return NULL;
2527
Thomas Wouters7e474022000-07-16 12:04:32 +00002528 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 i = j = 0;
2530 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2531 for (p = PyString_AS_STRING(self); p < e; p++)
2532 if (*p == '\t') {
2533 if (tabsize > 0)
2534 j += tabsize - (j % tabsize);
2535 }
2536 else {
2537 j++;
2538 if (*p == '\n' || *p == '\r') {
2539 i += j;
2540 j = 0;
2541 }
2542 }
2543
2544 /* Second pass: create output string and fill it */
2545 u = PyString_FromStringAndSize(NULL, i + j);
2546 if (!u)
2547 return NULL;
2548
2549 j = 0;
2550 q = PyString_AS_STRING(u);
2551
2552 for (p = PyString_AS_STRING(self); p < e; p++)
2553 if (*p == '\t') {
2554 if (tabsize > 0) {
2555 i = tabsize - (j % tabsize);
2556 j += i;
2557 while (i--)
2558 *q++ = ' ';
2559 }
2560 }
2561 else {
2562 j++;
2563 *q++ = *p;
2564 if (*p == '\n' || *p == '\r')
2565 j = 0;
2566 }
2567
2568 return u;
2569}
2570
Tim Peters8fa5dd02001-09-12 02:18:30 +00002571static PyObject *
2572pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573{
2574 PyObject *u;
2575
2576 if (left < 0)
2577 left = 0;
2578 if (right < 0)
2579 right = 0;
2580
Tim Peters8fa5dd02001-09-12 02:18:30 +00002581 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 Py_INCREF(self);
2583 return (PyObject *)self;
2584 }
2585
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002586 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002587 left + PyString_GET_SIZE(self) + right);
2588 if (u) {
2589 if (left)
2590 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002591 memcpy(PyString_AS_STRING(u) + left,
2592 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 PyString_GET_SIZE(self));
2594 if (right)
2595 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2596 fill, right);
2597 }
2598
2599 return u;
2600}
2601
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002602PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002603"S.ljust(width) -> string\n"
2604"\n"
2605"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002606"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607
2608static PyObject *
2609string_ljust(PyStringObject *self, PyObject *args)
2610{
2611 int width;
2612 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2613 return NULL;
2614
Tim Peters8fa5dd02001-09-12 02:18:30 +00002615 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 Py_INCREF(self);
2617 return (PyObject*) self;
2618 }
2619
2620 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2621}
2622
2623
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002624PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002625"S.rjust(width) -> string\n"
2626"\n"
2627"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002628"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629
2630static PyObject *
2631string_rjust(PyStringObject *self, PyObject *args)
2632{
2633 int width;
2634 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2635 return NULL;
2636
Tim Peters8fa5dd02001-09-12 02:18:30 +00002637 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002638 Py_INCREF(self);
2639 return (PyObject*) self;
2640 }
2641
2642 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2643}
2644
2645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002646PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002647"S.center(width) -> string\n"
2648"\n"
2649"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002650"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651
2652static PyObject *
2653string_center(PyStringObject *self, PyObject *args)
2654{
2655 int marg, left;
2656 int width;
2657
2658 if (!PyArg_ParseTuple(args, "i:center", &width))
2659 return NULL;
2660
Tim Peters8fa5dd02001-09-12 02:18:30 +00002661 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 Py_INCREF(self);
2663 return (PyObject*) self;
2664 }
2665
2666 marg = width - PyString_GET_SIZE(self);
2667 left = marg / 2 + (marg & width & 1);
2668
2669 return pad(self, left, marg - left, ' ');
2670}
2671
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002672PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002673"S.zfill(width) -> string\n"
2674"\n"
2675"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002676"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002677
2678static PyObject *
2679string_zfill(PyStringObject *self, PyObject *args)
2680{
2681 int fill;
2682 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002683 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002684
2685 int width;
2686 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2687 return NULL;
2688
2689 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002690 if (PyString_CheckExact(self)) {
2691 Py_INCREF(self);
2692 return (PyObject*) self;
2693 }
2694 else
2695 return PyString_FromStringAndSize(
2696 PyString_AS_STRING(self),
2697 PyString_GET_SIZE(self)
2698 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002699 }
2700
2701 fill = width - PyString_GET_SIZE(self);
2702
2703 s = pad(self, fill, 0, '0');
2704
2705 if (s == NULL)
2706 return NULL;
2707
2708 p = PyString_AS_STRING(s);
2709 if (p[fill] == '+' || p[fill] == '-') {
2710 /* move sign to beginning of string */
2711 p[0] = p[fill];
2712 p[fill] = '0';
2713 }
2714
2715 return (PyObject*) s;
2716}
2717
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002718PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002719"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002720"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002721"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002722"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723
2724static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002725string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002726{
Fred Drakeba096332000-07-09 07:04:36 +00002727 register const unsigned char *p
2728 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002729 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002730
Guido van Rossum4c08d552000-03-10 22:55:18 +00002731 /* Shortcut for single character strings */
2732 if (PyString_GET_SIZE(self) == 1 &&
2733 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002734 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002735
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002736 /* Special case for empty strings */
2737 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002739
Guido van Rossum4c08d552000-03-10 22:55:18 +00002740 e = p + PyString_GET_SIZE(self);
2741 for (; p < e; p++) {
2742 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002743 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002745 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002746}
2747
2748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002749PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002750"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002751\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002752Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002753and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002754
2755static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002756string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002757{
Fred Drakeba096332000-07-09 07:04:36 +00002758 register const unsigned char *p
2759 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002760 register const unsigned char *e;
2761
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002762 /* Shortcut for single character strings */
2763 if (PyString_GET_SIZE(self) == 1 &&
2764 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002765 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002766
2767 /* Special case for empty strings */
2768 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002770
2771 e = p + PyString_GET_SIZE(self);
2772 for (; p < e; p++) {
2773 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002774 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002775 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002776 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002777}
2778
2779
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002780PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002781"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002782\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002783Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002784and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002785
2786static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002787string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002788{
Fred Drakeba096332000-07-09 07:04:36 +00002789 register const unsigned char *p
2790 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002791 register const unsigned char *e;
2792
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002793 /* Shortcut for single character strings */
2794 if (PyString_GET_SIZE(self) == 1 &&
2795 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002796 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002797
2798 /* Special case for empty strings */
2799 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002801
2802 e = p + PyString_GET_SIZE(self);
2803 for (; p < e; p++) {
2804 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002805 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002806 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002807 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002808}
2809
2810
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002811PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002812"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002813\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002814Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002815False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002816
2817static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002818string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002819{
Fred Drakeba096332000-07-09 07:04:36 +00002820 register const unsigned char *p
2821 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002822 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002823
Guido van Rossum4c08d552000-03-10 22:55:18 +00002824 /* Shortcut for single character strings */
2825 if (PyString_GET_SIZE(self) == 1 &&
2826 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002827 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002829 /* Special case for empty strings */
2830 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002831 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002832
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833 e = p + PyString_GET_SIZE(self);
2834 for (; p < e; p++) {
2835 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002836 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002838 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002839}
2840
2841
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002842PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002843"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002845Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002846at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002847
2848static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002849string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850{
Fred Drakeba096332000-07-09 07:04:36 +00002851 register const unsigned char *p
2852 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002853 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854 int cased;
2855
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856 /* Shortcut for single character strings */
2857 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002858 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002859
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002860 /* Special case for empty strings */
2861 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002862 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002863
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864 e = p + PyString_GET_SIZE(self);
2865 cased = 0;
2866 for (; p < e; p++) {
2867 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002868 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 else if (!cased && islower(*p))
2870 cased = 1;
2871 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002872 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873}
2874
2875
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002876PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002877"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002878\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002879Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002880at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881
2882static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002883string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002884{
Fred Drakeba096332000-07-09 07:04:36 +00002885 register const unsigned char *p
2886 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002887 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888 int cased;
2889
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890 /* Shortcut for single character strings */
2891 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002892 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002894 /* Special case for empty strings */
2895 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002897
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898 e = p + PyString_GET_SIZE(self);
2899 cased = 0;
2900 for (; p < e; p++) {
2901 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002902 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002903 else if (!cased && isupper(*p))
2904 cased = 1;
2905 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002906 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907}
2908
2909
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002910PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002911"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002912\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002913Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002914may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002915ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916
2917static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002918string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002919{
Fred Drakeba096332000-07-09 07:04:36 +00002920 register const unsigned char *p
2921 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002922 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923 int cased, previous_is_cased;
2924
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925 /* Shortcut for single character strings */
2926 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002927 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002928
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002929 /* Special case for empty strings */
2930 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002931 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002932
Guido van Rossum4c08d552000-03-10 22:55:18 +00002933 e = p + PyString_GET_SIZE(self);
2934 cased = 0;
2935 previous_is_cased = 0;
2936 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002937 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002938
2939 if (isupper(ch)) {
2940 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002941 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 previous_is_cased = 1;
2943 cased = 1;
2944 }
2945 else if (islower(ch)) {
2946 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002947 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948 previous_is_cased = 1;
2949 cased = 1;
2950 }
2951 else
2952 previous_is_cased = 0;
2953 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002954 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955}
2956
2957
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002958PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002959"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960\n\
2961Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002962Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002963is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964
2965#define SPLIT_APPEND(data, left, right) \
2966 str = PyString_FromStringAndSize(data + left, right - left); \
2967 if (!str) \
2968 goto onError; \
2969 if (PyList_Append(list, str)) { \
2970 Py_DECREF(str); \
2971 goto onError; \
2972 } \
2973 else \
2974 Py_DECREF(str);
2975
2976static PyObject*
2977string_splitlines(PyStringObject *self, PyObject *args)
2978{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002979 register int i;
2980 register int j;
2981 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002982 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983 PyObject *list;
2984 PyObject *str;
2985 char *data;
2986
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002987 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002988 return NULL;
2989
2990 data = PyString_AS_STRING(self);
2991 len = PyString_GET_SIZE(self);
2992
Guido van Rossum4c08d552000-03-10 22:55:18 +00002993 list = PyList_New(0);
2994 if (!list)
2995 goto onError;
2996
2997 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002998 int eol;
2999
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000 /* Find a line and append it */
3001 while (i < len && data[i] != '\n' && data[i] != '\r')
3002 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003003
3004 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003005 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003006 if (i < len) {
3007 if (data[i] == '\r' && i + 1 < len &&
3008 data[i+1] == '\n')
3009 i += 2;
3010 else
3011 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003012 if (keepends)
3013 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003015 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003016 j = i;
3017 }
3018 if (j < len) {
3019 SPLIT_APPEND(data, j, len);
3020 }
3021
3022 return list;
3023
3024 onError:
3025 Py_DECREF(list);
3026 return NULL;
3027}
3028
3029#undef SPLIT_APPEND
3030
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003031
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003032static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003033string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003034 /* Counterparts of the obsolete stropmodule functions; except
3035 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003036 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3037 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3038 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3039 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003040 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3041 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3042 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3043 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3044 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3045 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3046 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003047 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3048 capitalize__doc__},
3049 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3050 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3051 endswith__doc__},
3052 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3053 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3054 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3055 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3056 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3057 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3058 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3059 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3060 startswith__doc__},
3061 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3062 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3063 swapcase__doc__},
3064 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3065 translate__doc__},
3066 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3067 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3068 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3069 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3070 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3071 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3072 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3073 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3074 expandtabs__doc__},
3075 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3076 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003077 {NULL, NULL} /* sentinel */
3078};
3079
Jeremy Hylton938ace62002-07-17 16:30:39 +00003080static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003081str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3082
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003084string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003086 PyObject *x = NULL;
3087 static char *kwlist[] = {"object", 0};
3088
Guido van Rossumae960af2001-08-30 03:11:59 +00003089 if (type != &PyString_Type)
3090 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003091 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3092 return NULL;
3093 if (x == NULL)
3094 return PyString_FromString("");
3095 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003096}
3097
Guido van Rossumae960af2001-08-30 03:11:59 +00003098static PyObject *
3099str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3100{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003101 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003102 int n;
3103
3104 assert(PyType_IsSubtype(type, &PyString_Type));
3105 tmp = string_new(&PyString_Type, args, kwds);
3106 if (tmp == NULL)
3107 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003108 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003109 n = PyString_GET_SIZE(tmp);
3110 pnew = type->tp_alloc(type, n);
3111 if (pnew != NULL) {
3112 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003113 ((PyStringObject *)pnew)->ob_shash =
3114 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003115 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003116 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003117 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003118 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003119}
3120
Guido van Rossumcacfc072002-05-24 19:01:59 +00003121static PyObject *
3122basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3123{
3124 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003125 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003126 return NULL;
3127}
3128
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003129static PyObject *
3130string_mod(PyObject *v, PyObject *w)
3131{
3132 if (!PyString_Check(v)) {
3133 Py_INCREF(Py_NotImplemented);
3134 return Py_NotImplemented;
3135 }
3136 return PyString_Format(v, w);
3137}
3138
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003139PyDoc_STRVAR(basestring_doc,
3140"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003141
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003142static PyNumberMethods string_as_number = {
3143 0, /*nb_add*/
3144 0, /*nb_subtract*/
3145 0, /*nb_multiply*/
3146 0, /*nb_divide*/
3147 string_mod, /*nb_remainder*/
3148};
3149
3150
Guido van Rossumcacfc072002-05-24 19:01:59 +00003151PyTypeObject PyBaseString_Type = {
3152 PyObject_HEAD_INIT(&PyType_Type)
3153 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003154 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003155 0,
3156 0,
3157 0, /* tp_dealloc */
3158 0, /* tp_print */
3159 0, /* tp_getattr */
3160 0, /* tp_setattr */
3161 0, /* tp_compare */
3162 0, /* tp_repr */
3163 0, /* tp_as_number */
3164 0, /* tp_as_sequence */
3165 0, /* tp_as_mapping */
3166 0, /* tp_hash */
3167 0, /* tp_call */
3168 0, /* tp_str */
3169 0, /* tp_getattro */
3170 0, /* tp_setattro */
3171 0, /* tp_as_buffer */
3172 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3173 basestring_doc, /* tp_doc */
3174 0, /* tp_traverse */
3175 0, /* tp_clear */
3176 0, /* tp_richcompare */
3177 0, /* tp_weaklistoffset */
3178 0, /* tp_iter */
3179 0, /* tp_iternext */
3180 0, /* tp_methods */
3181 0, /* tp_members */
3182 0, /* tp_getset */
3183 &PyBaseObject_Type, /* tp_base */
3184 0, /* tp_dict */
3185 0, /* tp_descr_get */
3186 0, /* tp_descr_set */
3187 0, /* tp_dictoffset */
3188 0, /* tp_init */
3189 0, /* tp_alloc */
3190 basestring_new, /* tp_new */
3191 0, /* tp_free */
3192};
3193
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003194PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003195"str(object) -> string\n\
3196\n\
3197Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003198If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200PyTypeObject PyString_Type = {
3201 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003202 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003203 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003205 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003206 (destructor)string_dealloc, /* tp_dealloc */
3207 (printfunc)string_print, /* tp_print */
3208 0, /* tp_getattr */
3209 0, /* tp_setattr */
3210 0, /* tp_compare */
3211 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003212 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003213 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003214 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003215 (hashfunc)string_hash, /* tp_hash */
3216 0, /* tp_call */
3217 (reprfunc)string_str, /* tp_str */
3218 PyObject_GenericGetAttr, /* tp_getattro */
3219 0, /* tp_setattro */
3220 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003221 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3222 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003223 string_doc, /* tp_doc */
3224 0, /* tp_traverse */
3225 0, /* tp_clear */
3226 (richcmpfunc)string_richcompare, /* tp_richcompare */
3227 0, /* tp_weaklistoffset */
3228 0, /* tp_iter */
3229 0, /* tp_iternext */
3230 string_methods, /* tp_methods */
3231 0, /* tp_members */
3232 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003233 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003234 0, /* tp_dict */
3235 0, /* tp_descr_get */
3236 0, /* tp_descr_set */
3237 0, /* tp_dictoffset */
3238 0, /* tp_init */
3239 0, /* tp_alloc */
3240 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003241 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003242};
3243
3244void
Fred Drakeba096332000-07-09 07:04:36 +00003245PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003246{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003247 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003248 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003249 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250 if (w == NULL || !PyString_Check(*pv)) {
3251 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003252 *pv = NULL;
3253 return;
3254 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003255 v = string_concat((PyStringObject *) *pv, w);
3256 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003257 *pv = v;
3258}
3259
Guido van Rossum013142a1994-08-30 08:19:36 +00003260void
Fred Drakeba096332000-07-09 07:04:36 +00003261PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003262{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 PyString_Concat(pv, w);
3264 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003265}
3266
3267
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003268/* The following function breaks the notion that strings are immutable:
3269 it changes the size of a string. We get away with this only if there
3270 is only one module referencing the object. You can also think of it
3271 as creating a new string object and destroying the old one, only
3272 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003273 already be known to some other part of the code...
3274 Note that if there's not enough memory to resize the string, the original
3275 string object at *pv is deallocated, *pv is set to NULL, an "out of
3276 memory" exception is set, and -1 is returned. Else (on success) 0 is
3277 returned, and the value in *pv may or may not be the same as on input.
3278 As always, an extra byte is allocated for a trailing \0 byte (newsize
3279 does *not* include that), and a trailing \0 byte is stored.
3280*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003281
3282int
Fred Drakeba096332000-07-09 07:04:36 +00003283_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003284{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003285 register PyObject *v;
3286 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003287 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003288 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003289 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003290 Py_DECREF(v);
3291 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003292 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003293 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003294 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003295 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 _Py_ForgetReference(v);
3297 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003298 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003299 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003300 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003301 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003302 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003303 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003304 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003305 _Py_NewReference(*pv);
3306 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003307 sv->ob_size = newsize;
3308 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003309 return 0;
3310}
Guido van Rossume5372401993-03-16 12:15:04 +00003311
3312/* Helpers for formatstring */
3313
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003315getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003316{
3317 int argidx = *p_argidx;
3318 if (argidx < arglen) {
3319 (*p_argidx)++;
3320 if (arglen < 0)
3321 return args;
3322 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003323 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003324 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003325 PyErr_SetString(PyExc_TypeError,
3326 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003327 return NULL;
3328}
3329
Tim Peters38fd5b62000-09-21 05:43:11 +00003330/* Format codes
3331 * F_LJUST '-'
3332 * F_SIGN '+'
3333 * F_BLANK ' '
3334 * F_ALT '#'
3335 * F_ZERO '0'
3336 */
Guido van Rossume5372401993-03-16 12:15:04 +00003337#define F_LJUST (1<<0)
3338#define F_SIGN (1<<1)
3339#define F_BLANK (1<<2)
3340#define F_ALT (1<<3)
3341#define F_ZERO (1<<4)
3342
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003343static int
Fred Drakeba096332000-07-09 07:04:36 +00003344formatfloat(char *buf, size_t buflen, int flags,
3345 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003346{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003347 /* fmt = '%#.' + `prec` + `type`
3348 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003349 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003350 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003351 x = PyFloat_AsDouble(v);
3352 if (x == -1.0 && PyErr_Occurred()) {
3353 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003354 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003355 }
Guido van Rossume5372401993-03-16 12:15:04 +00003356 if (prec < 0)
3357 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003358 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3359 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003360 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3361 (flags&F_ALT) ? "#" : "",
3362 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003363 /* worst case length calc to ensure no buffer overrun:
3364 fmt = %#.<prec>g
3365 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003366 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003367 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3368 If prec=0 the effective precision is 1 (the leading digit is
3369 always given), therefore increase by one to 10+prec. */
3370 if (buflen <= (size_t)10 + (size_t)prec) {
3371 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003372 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003373 return -1;
3374 }
Tim Peters885d4572001-11-28 20:27:42 +00003375 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003376 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003377}
3378
Tim Peters38fd5b62000-09-21 05:43:11 +00003379/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3380 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3381 * Python's regular ints.
3382 * Return value: a new PyString*, or NULL if error.
3383 * . *pbuf is set to point into it,
3384 * *plen set to the # of chars following that.
3385 * Caller must decref it when done using pbuf.
3386 * The string starting at *pbuf is of the form
3387 * "-"? ("0x" | "0X")? digit+
3388 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003389 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003390 * There will be at least prec digits, zero-filled on the left if
3391 * necessary to get that many.
3392 * val object to be converted
3393 * flags bitmask of format flags; only F_ALT is looked at
3394 * prec minimum number of digits; 0-fill on left if needed
3395 * type a character in [duoxX]; u acts the same as d
3396 *
3397 * CAUTION: o, x and X conversions on regular ints can never
3398 * produce a '-' sign, but can for Python's unbounded ints.
3399 */
3400PyObject*
3401_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3402 char **pbuf, int *plen)
3403{
3404 PyObject *result = NULL;
3405 char *buf;
3406 int i;
3407 int sign; /* 1 if '-', else 0 */
3408 int len; /* number of characters */
3409 int numdigits; /* len == numnondigits + numdigits */
3410 int numnondigits = 0;
3411
3412 switch (type) {
3413 case 'd':
3414 case 'u':
3415 result = val->ob_type->tp_str(val);
3416 break;
3417 case 'o':
3418 result = val->ob_type->tp_as_number->nb_oct(val);
3419 break;
3420 case 'x':
3421 case 'X':
3422 numnondigits = 2;
3423 result = val->ob_type->tp_as_number->nb_hex(val);
3424 break;
3425 default:
3426 assert(!"'type' not in [duoxX]");
3427 }
3428 if (!result)
3429 return NULL;
3430
3431 /* To modify the string in-place, there can only be one reference. */
3432 if (result->ob_refcnt != 1) {
3433 PyErr_BadInternalCall();
3434 return NULL;
3435 }
3436 buf = PyString_AsString(result);
3437 len = PyString_Size(result);
3438 if (buf[len-1] == 'L') {
3439 --len;
3440 buf[len] = '\0';
3441 }
3442 sign = buf[0] == '-';
3443 numnondigits += sign;
3444 numdigits = len - numnondigits;
3445 assert(numdigits > 0);
3446
Tim Petersfff53252001-04-12 18:38:48 +00003447 /* Get rid of base marker unless F_ALT */
3448 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003449 /* Need to skip 0x, 0X or 0. */
3450 int skipped = 0;
3451 switch (type) {
3452 case 'o':
3453 assert(buf[sign] == '0');
3454 /* If 0 is only digit, leave it alone. */
3455 if (numdigits > 1) {
3456 skipped = 1;
3457 --numdigits;
3458 }
3459 break;
3460 case 'x':
3461 case 'X':
3462 assert(buf[sign] == '0');
3463 assert(buf[sign + 1] == 'x');
3464 skipped = 2;
3465 numnondigits -= 2;
3466 break;
3467 }
3468 if (skipped) {
3469 buf += skipped;
3470 len -= skipped;
3471 if (sign)
3472 buf[0] = '-';
3473 }
3474 assert(len == numnondigits + numdigits);
3475 assert(numdigits > 0);
3476 }
3477
3478 /* Fill with leading zeroes to meet minimum width. */
3479 if (prec > numdigits) {
3480 PyObject *r1 = PyString_FromStringAndSize(NULL,
3481 numnondigits + prec);
3482 char *b1;
3483 if (!r1) {
3484 Py_DECREF(result);
3485 return NULL;
3486 }
3487 b1 = PyString_AS_STRING(r1);
3488 for (i = 0; i < numnondigits; ++i)
3489 *b1++ = *buf++;
3490 for (i = 0; i < prec - numdigits; i++)
3491 *b1++ = '0';
3492 for (i = 0; i < numdigits; i++)
3493 *b1++ = *buf++;
3494 *b1 = '\0';
3495 Py_DECREF(result);
3496 result = r1;
3497 buf = PyString_AS_STRING(result);
3498 len = numnondigits + prec;
3499 }
3500
3501 /* Fix up case for hex conversions. */
3502 switch (type) {
3503 case 'x':
3504 /* Need to convert all upper case letters to lower case. */
3505 for (i = 0; i < len; i++)
3506 if (buf[i] >= 'A' && buf[i] <= 'F')
3507 buf[i] += 'a'-'A';
3508 break;
3509 case 'X':
3510 /* Need to convert 0x to 0X (and -0x to -0X). */
3511 if (buf[sign + 1] == 'x')
3512 buf[sign + 1] = 'X';
3513 break;
3514 }
3515 *pbuf = buf;
3516 *plen = len;
3517 return result;
3518}
3519
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003520static int
Fred Drakeba096332000-07-09 07:04:36 +00003521formatint(char *buf, size_t buflen, int flags,
3522 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003523{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003524 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003525 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3526 + 1 + 1 = 24 */
3527 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003528 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003529
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003530 x = PyInt_AsLong(v);
3531 if (x == -1 && PyErr_Occurred()) {
3532 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003533 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003534 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003535 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003536 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003537 "%u/%o/%x/%X of negative int will return "
3538 "a signed string in Python 2.4 and up") < 0)
3539 return -1;
3540 }
Guido van Rossume5372401993-03-16 12:15:04 +00003541 if (prec < 0)
3542 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003543
3544 if ((flags & F_ALT) &&
3545 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003546 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003547 * of issues that cause pain:
3548 * - when 0 is being converted, the C standard leaves off
3549 * the '0x' or '0X', which is inconsistent with other
3550 * %#x/%#X conversions and inconsistent with Python's
3551 * hex() function
3552 * - there are platforms that violate the standard and
3553 * convert 0 with the '0x' or '0X'
3554 * (Metrowerks, Compaq Tru64)
3555 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003556 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003557 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003558 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003559 * We can achieve the desired consistency by inserting our
3560 * own '0x' or '0X' prefix, and substituting %x/%X in place
3561 * of %#x/%#X.
3562 *
3563 * Note that this is the same approach as used in
3564 * formatint() in unicodeobject.c
3565 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003566 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003567 type, prec, type);
3568 }
3569 else {
3570 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003571 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003572 prec, type);
3573 }
3574
Tim Peters38fd5b62000-09-21 05:43:11 +00003575 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003576 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3577 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003578 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003579 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003580 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003581 return -1;
3582 }
Tim Peters885d4572001-11-28 20:27:42 +00003583 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003584 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003585}
3586
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003587static int
Fred Drakeba096332000-07-09 07:04:36 +00003588formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003589{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003590 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003591 if (PyString_Check(v)) {
3592 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003593 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003594 }
3595 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003596 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003597 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003598 }
3599 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003600 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003601}
3602
Guido van Rossum013142a1994-08-30 08:19:36 +00003603
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3605
3606 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3607 chars are formatted. XXX This is a magic number. Each formatting
3608 routine does bounds checking to ensure no overflow, but a better
3609 solution may be to malloc a buffer of appropriate size for each
3610 format. For now, the current solution is sufficient.
3611*/
3612#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003613
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003614PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003615PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003616{
3617 char *fmt, *res;
3618 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003619 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003620 PyObject *result, *orig_args;
3621#ifdef Py_USING_UNICODE
3622 PyObject *v, *w;
3623#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003624 PyObject *dict = NULL;
3625 if (format == NULL || !PyString_Check(format) || args == NULL) {
3626 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003627 return NULL;
3628 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003629 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003630 fmt = PyString_AS_STRING(format);
3631 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003632 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003633 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003634 if (result == NULL)
3635 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003636 res = PyString_AsString(result);
3637 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003638 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003639 argidx = 0;
3640 }
3641 else {
3642 arglen = -1;
3643 argidx = -2;
3644 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003645 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3646 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003647 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003648 while (--fmtcnt >= 0) {
3649 if (*fmt != '%') {
3650 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003651 rescnt = fmtcnt + 100;
3652 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003653 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003654 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003655 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003656 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003657 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003658 }
3659 *res++ = *fmt++;
3660 }
3661 else {
3662 /* Got a format specifier */
3663 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003664 int width = -1;
3665 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003666 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003667 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003668 PyObject *v = NULL;
3669 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003670 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003671 int sign;
3672 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003673 char formatbuf[FORMATBUFLEN];
3674 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003675#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003676 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003677 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003678#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003679
Guido van Rossumda9c2711996-12-05 21:58:58 +00003680 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003681 if (*fmt == '(') {
3682 char *keystart;
3683 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003684 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003685 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003686
3687 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003688 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003689 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003690 goto error;
3691 }
3692 ++fmt;
3693 --fmtcnt;
3694 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003695 /* Skip over balanced parentheses */
3696 while (pcount > 0 && --fmtcnt >= 0) {
3697 if (*fmt == ')')
3698 --pcount;
3699 else if (*fmt == '(')
3700 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003701 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003702 }
3703 keylen = fmt - keystart - 1;
3704 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003705 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003706 "incomplete format key");
3707 goto error;
3708 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003709 key = PyString_FromStringAndSize(keystart,
3710 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003711 if (key == NULL)
3712 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003713 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003714 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003715 args_owned = 0;
3716 }
3717 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003718 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003719 if (args == NULL) {
3720 goto error;
3721 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003722 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003723 arglen = -1;
3724 argidx = -2;
3725 }
Guido van Rossume5372401993-03-16 12:15:04 +00003726 while (--fmtcnt >= 0) {
3727 switch (c = *fmt++) {
3728 case '-': flags |= F_LJUST; continue;
3729 case '+': flags |= F_SIGN; continue;
3730 case ' ': flags |= F_BLANK; continue;
3731 case '#': flags |= F_ALT; continue;
3732 case '0': flags |= F_ZERO; continue;
3733 }
3734 break;
3735 }
3736 if (c == '*') {
3737 v = getnextarg(args, arglen, &argidx);
3738 if (v == NULL)
3739 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003740 if (!PyInt_Check(v)) {
3741 PyErr_SetString(PyExc_TypeError,
3742 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003743 goto error;
3744 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003745 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003746 if (width < 0) {
3747 flags |= F_LJUST;
3748 width = -width;
3749 }
Guido van Rossume5372401993-03-16 12:15:04 +00003750 if (--fmtcnt >= 0)
3751 c = *fmt++;
3752 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003753 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003754 width = c - '0';
3755 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003756 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003757 if (!isdigit(c))
3758 break;
3759 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003760 PyErr_SetString(
3761 PyExc_ValueError,
3762 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003763 goto error;
3764 }
3765 width = width*10 + (c - '0');
3766 }
3767 }
3768 if (c == '.') {
3769 prec = 0;
3770 if (--fmtcnt >= 0)
3771 c = *fmt++;
3772 if (c == '*') {
3773 v = getnextarg(args, arglen, &argidx);
3774 if (v == NULL)
3775 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003776 if (!PyInt_Check(v)) {
3777 PyErr_SetString(
3778 PyExc_TypeError,
3779 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003780 goto error;
3781 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003782 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003783 if (prec < 0)
3784 prec = 0;
3785 if (--fmtcnt >= 0)
3786 c = *fmt++;
3787 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003788 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003789 prec = c - '0';
3790 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003791 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003792 if (!isdigit(c))
3793 break;
3794 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003795 PyErr_SetString(
3796 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003797 "prec too big");
3798 goto error;
3799 }
3800 prec = prec*10 + (c - '0');
3801 }
3802 }
3803 } /* prec */
3804 if (fmtcnt >= 0) {
3805 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003806 if (--fmtcnt >= 0)
3807 c = *fmt++;
3808 }
3809 }
3810 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003811 PyErr_SetString(PyExc_ValueError,
3812 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003813 goto error;
3814 }
3815 if (c != '%') {
3816 v = getnextarg(args, arglen, &argidx);
3817 if (v == NULL)
3818 goto error;
3819 }
3820 sign = 0;
3821 fill = ' ';
3822 switch (c) {
3823 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003824 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003825 len = 1;
3826 break;
3827 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003828#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003829 if (PyUnicode_Check(v)) {
3830 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003831 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003832 goto unicode;
3833 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003834#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003835 /* Fall through */
3836 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003837 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003838 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003839 else
3840 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003841 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003842 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003843 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003844 /* XXX Note: this should never happen,
3845 since PyObject_Repr() and
3846 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003847 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003848 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003849 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003850 goto error;
3851 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003852 pbuf = PyString_AS_STRING(temp);
3853 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003854 if (prec >= 0 && len > prec)
3855 len = prec;
3856 break;
3857 case 'i':
3858 case 'd':
3859 case 'u':
3860 case 'o':
3861 case 'x':
3862 case 'X':
3863 if (c == 'i')
3864 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003865 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003866 temp = _PyString_FormatLong(v, flags,
3867 prec, c, &pbuf, &len);
3868 if (!temp)
3869 goto error;
3870 /* unbounded ints can always produce
3871 a sign character! */
3872 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003873 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003874 else {
3875 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003876 len = formatint(pbuf,
3877 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003878 flags, prec, c, v);
3879 if (len < 0)
3880 goto error;
3881 /* only d conversion is signed */
3882 sign = c == 'd';
3883 }
3884 if (flags & F_ZERO)
3885 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003886 break;
3887 case 'e':
3888 case 'E':
3889 case 'f':
3890 case 'g':
3891 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003892 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003893 len = formatfloat(pbuf, sizeof(formatbuf),
3894 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003895 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003896 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003897 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003898 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003899 fill = '0';
3900 break;
3901 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003902 pbuf = formatbuf;
3903 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003904 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003905 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003906 break;
3907 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003908 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003909 "unsupported format character '%c' (0x%x) "
3910 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003911 c, c,
3912 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003913 goto error;
3914 }
3915 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003916 if (*pbuf == '-' || *pbuf == '+') {
3917 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003918 len--;
3919 }
3920 else if (flags & F_SIGN)
3921 sign = '+';
3922 else if (flags & F_BLANK)
3923 sign = ' ';
3924 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003925 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003926 }
3927 if (width < len)
3928 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003929 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003930 reslen -= rescnt;
3931 rescnt = width + fmtcnt + 100;
3932 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003933 if (reslen < 0) {
3934 Py_DECREF(result);
3935 return PyErr_NoMemory();
3936 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003937 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003938 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003939 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003940 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003941 }
3942 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003943 if (fill != ' ')
3944 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003945 rescnt--;
3946 if (width > len)
3947 width--;
3948 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003949 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3950 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003951 assert(pbuf[1] == c);
3952 if (fill != ' ') {
3953 *res++ = *pbuf++;
3954 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003955 }
Tim Petersfff53252001-04-12 18:38:48 +00003956 rescnt -= 2;
3957 width -= 2;
3958 if (width < 0)
3959 width = 0;
3960 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003961 }
3962 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003963 do {
3964 --rescnt;
3965 *res++ = fill;
3966 } while (--width > len);
3967 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003968 if (fill == ' ') {
3969 if (sign)
3970 *res++ = sign;
3971 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003972 (c == 'x' || c == 'X')) {
3973 assert(pbuf[0] == '0');
3974 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003975 *res++ = *pbuf++;
3976 *res++ = *pbuf++;
3977 }
3978 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003979 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003980 res += len;
3981 rescnt -= len;
3982 while (--width >= len) {
3983 --rescnt;
3984 *res++ = ' ';
3985 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003986 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003987 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003988 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003989 goto error;
3990 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003992 } /* '%' */
3993 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003994 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003995 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003996 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003997 goto error;
3998 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003999 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004000 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004001 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004002 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004003 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004004
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004005#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004006 unicode:
4007 if (args_owned) {
4008 Py_DECREF(args);
4009 args_owned = 0;
4010 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004011 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004012 if (PyTuple_Check(orig_args) && argidx > 0) {
4013 PyObject *v;
4014 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4015 v = PyTuple_New(n);
4016 if (v == NULL)
4017 goto error;
4018 while (--n >= 0) {
4019 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4020 Py_INCREF(w);
4021 PyTuple_SET_ITEM(v, n, w);
4022 }
4023 args = v;
4024 } else {
4025 Py_INCREF(orig_args);
4026 args = orig_args;
4027 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004028 args_owned = 1;
4029 /* Take what we have of the result and let the Unicode formatting
4030 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004031 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004032 if (_PyString_Resize(&result, rescnt))
4033 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004034 fmtcnt = PyString_GET_SIZE(format) - \
4035 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004036 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4037 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004038 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004039 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004040 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004041 if (v == NULL)
4042 goto error;
4043 /* Paste what we have (result) to what the Unicode formatting
4044 function returned (v) and return the result (or error) */
4045 w = PyUnicode_Concat(result, v);
4046 Py_DECREF(result);
4047 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004048 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004049 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004050#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004051
Guido van Rossume5372401993-03-16 12:15:04 +00004052 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004053 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004054 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004056 }
Guido van Rossume5372401993-03-16 12:15:04 +00004057 return NULL;
4058}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004059
Guido van Rossum2a61e741997-01-18 07:55:05 +00004060void
Fred Drakeba096332000-07-09 07:04:36 +00004061PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004062{
4063 register PyStringObject *s = (PyStringObject *)(*p);
4064 PyObject *t;
4065 if (s == NULL || !PyString_Check(s))
4066 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004067 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004068 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004069 if (interned == NULL) {
4070 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004071 if (interned == NULL) {
4072 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004073 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004074 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004075 }
4076 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4077 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004078 Py_DECREF(*p);
4079 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004080 return;
4081 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004082 /* Ensure that only true string objects appear in the intern dict */
4083 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004084 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4085 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004086 if (t == NULL) {
4087 PyErr_Clear();
4088 return;
Tim Peters111f6092001-09-12 07:54:51 +00004089 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004090 } else {
4091 t = (PyObject*) s;
4092 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004093 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094
4095 if (PyDict_SetItem(interned, t, t) == 0) {
4096 /* The two references in interned are not counted by
4097 refcnt. The string deallocator will take care of this */
4098 ((PyObject *)t)->ob_refcnt-=2;
4099 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4100 Py_DECREF(*p);
4101 *p = t;
4102 return;
4103 }
4104 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004105 PyErr_Clear();
4106}
4107
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004108void
4109PyString_InternImmortal(PyObject **p)
4110{
4111 PyString_InternInPlace(p);
4112 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4113 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4114 Py_INCREF(*p);
4115 }
4116}
4117
Guido van Rossum2a61e741997-01-18 07:55:05 +00004118
4119PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004120PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004121{
4122 PyObject *s = PyString_FromString(cp);
4123 if (s == NULL)
4124 return NULL;
4125 PyString_InternInPlace(&s);
4126 return s;
4127}
4128
Guido van Rossum8cf04761997-08-02 02:57:45 +00004129void
Fred Drakeba096332000-07-09 07:04:36 +00004130PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004131{
4132 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004133 for (i = 0; i < UCHAR_MAX + 1; i++) {
4134 Py_XDECREF(characters[i]);
4135 characters[i] = NULL;
4136 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004137 Py_XDECREF(nullstring);
4138 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004139}
Barry Warsawa903ad982001-02-23 16:40:48 +00004140
Barry Warsawa903ad982001-02-23 16:40:48 +00004141void _Py_ReleaseInternedStrings(void)
4142{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004143 PyObject *keys;
4144 PyStringObject *s;
4145 int i, n;
4146
4147 if (interned == NULL || !PyDict_Check(interned))
4148 return;
4149 keys = PyDict_Keys(interned);
4150 if (keys == NULL || !PyList_Check(keys)) {
4151 PyErr_Clear();
4152 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004153 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004154
4155 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4156 detector, interned strings are not forcibly deallocated; rather, we
4157 give them their stolen references back, and then clear and DECREF
4158 the interned dict. */
4159
4160 fprintf(stderr, "releasing interned strings\n");
4161 n = PyList_GET_SIZE(keys);
4162 for (i = 0; i < n; i++) {
4163 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4164 switch (s->ob_sstate) {
4165 case SSTATE_NOT_INTERNED:
4166 /* XXX Shouldn't happen */
4167 break;
4168 case SSTATE_INTERNED_IMMORTAL:
4169 s->ob_refcnt += 1;
4170 break;
4171 case SSTATE_INTERNED_MORTAL:
4172 s->ob_refcnt += 2;
4173 break;
4174 default:
4175 Py_FatalError("Inconsistent interned string state.");
4176 }
4177 s->ob_sstate = SSTATE_NOT_INTERNED;
4178 }
4179 Py_DECREF(keys);
4180 PyDict_Clear(interned);
4181 Py_DECREF(interned);
4182 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004183}