blob: f18edb00c46d914dda640278ee60c7b9c8243c21 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000936 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000937 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000939 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 if (n < 0)
941 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000942 /* watch out for overflows: the size can overflow int,
943 * and the # of bytes needed can overflow size_t
944 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000946 if (n && size / n != a->ob_size) {
947 PyErr_SetString(PyExc_OverflowError,
948 "repeated string is too long");
949 return NULL;
950 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000951 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
Tim Peters8f422462000-09-09 06:13:41 +0000955 nbytes = size * sizeof(char);
956 if (nbytes / sizeof(char) != (size_t)size ||
957 nbytes + sizeof(PyStringObject) <= nbytes) {
958 PyErr_SetString(PyExc_OverflowError,
959 "repeated string is too long");
960 return NULL;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000963 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000969 op->ob_sval[size] = '\0';
970 if (a->ob_size == 1 && n > 0) {
971 memset(op->ob_sval, a->ob_sval[0] , n);
972 return (PyObject *) op;
973 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000974 i = 0;
975 if (i < size) {
976 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
977 i = (int) a->ob_size;
978 }
979 while (i < size) {
980 j = (i <= size-i) ? i : size-i;
981 memcpy(op->ob_sval+i, op->ob_sval, j);
982 i += j;
983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985}
986
987/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000990string_slice(register PyStringObject *a, register int i, register int j)
991 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
993 if (i < 0)
994 i = 0;
995 if (j < 0)
996 j = 0; /* Avoid signed/unsigned bug in next line */
997 if (j > a->ob_size)
998 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000999 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1000 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 Py_INCREF(a);
1002 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 }
1004 if (j < i)
1005 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007}
1008
Guido van Rossum9284a572000-03-07 15:53:43 +00001009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001011{
Barry Warsaw817918c2002-08-06 16:58:21 +00001012 const char *lhs, *rhs, *end;
1013 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014
1015 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (PyUnicode_Check(el))
1018 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001019#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001020 if (!PyString_Check(el)) {
1021 PyErr_SetString(PyExc_TypeError,
1022 "'in <string>' requires string as left operand");
1023 return -1;
1024 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001025 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 rhs = PyString_AS_STRING(el);
1028 lhs = PyString_AS_STRING(a);
1029
1030 /* optimize for a single character */
1031 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001032 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001033
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001034 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001035 while (lhs <= end) {
1036 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001037 return 1;
1038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Guido van Rossum9284a572000-03-07 15:53:43 +00001040 return 0;
1041}
1042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001044string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001047 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050 return NULL;
1051 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001052 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001053 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001054 if (v == NULL)
1055 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001056 else {
1057#ifdef COUNT_ALLOCS
1058 one_strings++;
1059#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001060 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001061 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001062 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Martin v. Löwiscd353062001-05-24 16:56:35 +00001065static PyObject*
1066string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001068 int c;
1069 int len_a, len_b;
1070 int min_len;
1071 PyObject *result;
1072
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001073 /* Make sure both arguments are strings. */
1074 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 result = Py_NotImplemented;
1076 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001077 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001078 if (a == b) {
1079 switch (op) {
1080 case Py_EQ:case Py_LE:case Py_GE:
1081 result = Py_True;
1082 goto out;
1083 case Py_NE:case Py_LT:case Py_GT:
1084 result = Py_False;
1085 goto out;
1086 }
1087 }
1088 if (op == Py_EQ) {
1089 /* Supporting Py_NE here as well does not save
1090 much time, since Py_NE is rarely used. */
1091 if (a->ob_size == b->ob_size
1092 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001093 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 a->ob_size) == 0)) {
1095 result = Py_True;
1096 } else {
1097 result = Py_False;
1098 }
1099 goto out;
1100 }
1101 len_a = a->ob_size; len_b = b->ob_size;
1102 min_len = (len_a < len_b) ? len_a : len_b;
1103 if (min_len > 0) {
1104 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1105 if (c==0)
1106 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1107 }else
1108 c = 0;
1109 if (c == 0)
1110 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1111 switch (op) {
1112 case Py_LT: c = c < 0; break;
1113 case Py_LE: c = c <= 0; break;
1114 case Py_EQ: assert(0); break; /* unreachable */
1115 case Py_NE: c = c != 0; break;
1116 case Py_GT: c = c > 0; break;
1117 case Py_GE: c = c >= 0; break;
1118 default:
1119 result = Py_NotImplemented;
1120 goto out;
1121 }
1122 result = c ? Py_True : Py_False;
1123 out:
1124 Py_INCREF(result);
1125 return result;
1126}
1127
1128int
1129_PyString_Eq(PyObject *o1, PyObject *o2)
1130{
1131 PyStringObject *a, *b;
1132 a = (PyStringObject*)o1;
1133 b = (PyStringObject*)o2;
1134 return a->ob_size == b->ob_size
1135 && *a->ob_sval == *b->ob_sval
1136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001137}
1138
Guido van Rossum9bfef441993-03-29 10:43:31 +00001139static long
Fred Drakeba096332000-07-09 07:04:36 +00001140string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001141{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 register int len;
1143 register unsigned char *p;
1144 register long x;
1145
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001146 if (a->ob_shash != -1)
1147 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 len = a->ob_size;
1149 p = (unsigned char *) a->ob_sval;
1150 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001151 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001152 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 x ^= a->ob_size;
1154 if (x == -1)
1155 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001156 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 return x;
1158}
1159
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001160static PyObject*
1161string_subscript(PyStringObject* self, PyObject* item)
1162{
1163 if (PyInt_Check(item)) {
1164 long i = PyInt_AS_LONG(item);
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PyLong_Check(item)) {
1170 long i = PyLong_AsLong(item);
1171 if (i == -1 && PyErr_Occurred())
1172 return NULL;
1173 if (i < 0)
1174 i += PyString_GET_SIZE(self);
1175 return string_item(self,i);
1176 }
1177 else if (PySlice_Check(item)) {
1178 int start, stop, step, slicelength, cur, i;
1179 char* source_buf;
1180 char* result_buf;
1181 PyObject* result;
1182
1183 if (PySlice_GetIndicesEx((PySliceObject*)item,
1184 PyString_GET_SIZE(self),
1185 &start, &stop, &step, &slicelength) < 0) {
1186 return NULL;
1187 }
1188
1189 if (slicelength <= 0) {
1190 return PyString_FromStringAndSize("", 0);
1191 }
1192 else {
1193 source_buf = PyString_AsString((PyObject*)self);
1194 result_buf = PyMem_Malloc(slicelength);
1195
1196 for (cur = start, i = 0; i < slicelength;
1197 cur += step, i++) {
1198 result_buf[i] = source_buf[cur];
1199 }
1200
1201 result = PyString_FromStringAndSize(result_buf,
1202 slicelength);
1203 PyMem_Free(result_buf);
1204 return result;
1205 }
1206 }
1207 else {
1208 PyErr_SetString(PyExc_TypeError,
1209 "string indices must be integers");
1210 return NULL;
1211 }
1212}
1213
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001218 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001219 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220 return -1;
1221 }
1222 *ptr = (void *)self->ob_sval;
1223 return self->ob_size;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
Guido van Rossum045e6881997-09-08 18:30:11 +00001229 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001230 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231 return -1;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
1237 if ( lenp )
1238 *lenp = self->ob_size;
1239 return 1;
1240}
1241
Guido van Rossum1db70701998-10-08 02:18:52 +00001242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001244{
1245 if ( index != 0 ) {
1246 PyErr_SetString(PyExc_SystemError,
1247 "accessing non-existent string segment");
1248 return -1;
1249 }
1250 *ptr = self->ob_sval;
1251 return self->ob_size;
1252}
1253
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001254static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001255 (inquiry)string_length, /*sq_length*/
1256 (binaryfunc)string_concat, /*sq_concat*/
1257 (intargfunc)string_repeat, /*sq_repeat*/
1258 (intargfunc)string_item, /*sq_item*/
1259 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001260 0, /*sq_ass_item*/
1261 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001262 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001263};
1264
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001265static PyMappingMethods string_as_mapping = {
1266 (inquiry)string_length,
1267 (binaryfunc)string_subscript,
1268 0,
1269};
1270
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271static PyBufferProcs string_as_buffer = {
1272 (getreadbufferproc)string_buffer_getreadbuf,
1273 (getwritebufferproc)string_buffer_getwritebuf,
1274 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001275 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276};
1277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
1279
1280#define LEFTSTRIP 0
1281#define RIGHTSTRIP 1
1282#define BOTHSTRIP 2
1283
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001285static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1286
1287#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289
1290static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001291split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294 PyObject* item;
1295 PyObject *list = PyList_New(0);
1296
1297 if (list == NULL)
1298 return NULL;
1299
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 for (i = j = 0; i < len; ) {
1301 while (i < len && isspace(Py_CHARMASK(s[i])))
1302 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 while (i < len && !isspace(Py_CHARMASK(s[i])))
1305 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 if (maxsplit-- <= 0)
1308 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1310 if (item == NULL)
1311 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
1318 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 }
1320 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321 if (j < len) {
1322 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1323 if (item == NULL)
1324 goto finally;
1325 err = PyList_Append(list, item);
1326 Py_DECREF(item);
1327 if (err < 0)
1328 goto finally;
1329 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return list;
1331 finally:
1332 Py_DECREF(list);
1333 return NULL;
1334}
1335
1336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001337PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338"S.split([sep [,maxsplit]]) -> list of strings\n\
1339\n\
1340Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001342splits are done. If sep is not specified or is None, any\n\
1343whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
1348 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 int maxsplit = -1;
1350 const char *s = PyString_AS_STRING(self), *sub;
1351 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (maxsplit < 0)
1356 maxsplit = INT_MAX;
1357 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (PyString_Check(subobj)) {
1360 sub = PyString_AS_STRING(subobj);
1361 n = PyString_GET_SIZE(subobj);
1362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001363#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 else if (PyUnicode_Check(subobj))
1365 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001366#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1368 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 if (n == 0) {
1370 PyErr_SetString(PyExc_ValueError, "empty separator");
1371 return NULL;
1372 }
1373
1374 list = PyList_New(0);
1375 if (list == NULL)
1376 return NULL;
1377
1378 i = j = 0;
1379 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001380 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 if (maxsplit-- <= 0)
1382 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1384 if (item == NULL)
1385 goto fail;
1386 err = PyList_Append(list, item);
1387 Py_DECREF(item);
1388 if (err < 0)
1389 goto fail;
1390 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
1392 else
1393 i++;
1394 }
1395 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1396 if (item == NULL)
1397 goto fail;
1398 err = PyList_Append(list, item);
1399 Py_DECREF(item);
1400 if (err < 0)
1401 goto fail;
1402
1403 return list;
1404
1405 fail:
1406 Py_DECREF(list);
1407 return NULL;
1408}
1409
1410
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001411PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412"S.join(sequence) -> string\n\
1413\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001415sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416
1417static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
1420 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001421 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 char *p;
1424 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 size_t sz = 0;
1426 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001427 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428
Tim Peters19fe14e2001-01-19 03:03:47 +00001429 seq = PySequence_Fast(orig, "");
1430 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001431 if (PyErr_ExceptionMatches(PyExc_TypeError))
1432 PyErr_Format(PyExc_TypeError,
1433 "sequence expected, %.80s found",
1434 orig->ob_type->tp_name);
1435 return NULL;
1436 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001437
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001438 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001439 if (seqlen == 0) {
1440 Py_DECREF(seq);
1441 return PyString_FromString("");
1442 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001445 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1446 PyErr_Format(PyExc_TypeError,
1447 "sequence item 0: expected string,"
1448 " %.80s found",
1449 item->ob_type->tp_name);
1450 Py_DECREF(seq);
1451 return NULL;
1452 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001453 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001454 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 /* There are at least two things to join. Do a pre-pass to figure out
1459 * the total amount of space we'll need (sz), see whether any argument
1460 * is absurd, and defer to the Unicode join if appropriate.
1461 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 item = PySequence_Fast_GET_ITEM(seq, i);
1465 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001466#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001468 /* Defer to Unicode join.
1469 * CAUTION: There's no gurantee that the
1470 * original sequence can be iterated over
1471 * again, so we must pass seq here.
1472 */
1473 PyObject *result;
1474 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001475 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001476 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001478#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001479 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001480 "sequence item %i: expected string,"
1481 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001482 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 Py_DECREF(seq);
1484 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001485 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 sz += PyString_GET_SIZE(item);
1487 if (i != 0)
1488 sz += seplen;
1489 if (sz < old_sz || sz > INT_MAX) {
1490 PyErr_SetString(PyExc_OverflowError,
1491 "join() is too long for a Python string");
1492 Py_DECREF(seq);
1493 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 }
1496
1497 /* Allocate result space. */
1498 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1499 if (res == NULL) {
1500 Py_DECREF(seq);
1501 return NULL;
1502 }
1503
1504 /* Catenate everything. */
1505 p = PyString_AS_STRING(res);
1506 for (i = 0; i < seqlen; ++i) {
1507 size_t n;
1508 item = PySequence_Fast_GET_ITEM(seq, i);
1509 n = PyString_GET_SIZE(item);
1510 memcpy(p, PyString_AS_STRING(item), n);
1511 p += n;
1512 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001513 memcpy(p, sep, seplen);
1514 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001515 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001517
Jeremy Hylton49048292000-07-11 03:28:17 +00001518 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520}
1521
Tim Peters52e155e2001-06-16 05:42:57 +00001522PyObject *
1523_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001524{
Tim Petersa7259592001-06-16 05:11:17 +00001525 assert(sep != NULL && PyString_Check(sep));
1526 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001527 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001528}
1529
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001530static void
1531string_adjust_indices(int *start, int *end, int len)
1532{
1533 if (*end > len)
1534 *end = len;
1535 else if (*end < 0)
1536 *end += len;
1537 if (*end < 0)
1538 *end = 0;
1539 if (*start < 0)
1540 *start += len;
1541 if (*start < 0)
1542 *start = 0;
1543}
1544
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545static long
Fred Drakeba096332000-07-09 07:04:36 +00001546string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549 int len = PyString_GET_SIZE(self);
1550 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001553 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001554 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return -2;
1556 if (PyString_Check(subobj)) {
1557 sub = PyString_AS_STRING(subobj);
1558 n = PyString_GET_SIZE(subobj);
1559 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001560#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001562 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001563#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 return -2;
1566
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001567 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (dir > 0) {
1570 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 last -= n;
1573 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001574 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 return (long)i;
1576 }
1577 else {
1578 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001579
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 if (n == 0 && i <= last)
1581 return (long)last;
1582 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001583 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 return (long)j;
1585 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001586
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 return -1;
1588}
1589
1590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.find(sub [,start [,end]]) -> int\n\
1593\n\
1594Return the lowest index in S where substring sub is found,\n\
1595such that sub is contained within s[start,end]. Optional\n\
1596arguments start and end are interpreted as in slice notation.\n\
1597\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001598Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599
1600static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001601string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001603 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 if (result == -2)
1605 return NULL;
1606 return PyInt_FromLong(result);
1607}
1608
1609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611"S.index(sub [,start [,end]]) -> int\n\
1612\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614
1615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001616string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (result == -2)
1620 return NULL;
1621 if (result == -1) {
1622 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001623 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 return NULL;
1625 }
1626 return PyInt_FromLong(result);
1627}
1628
1629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001630PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631"S.rfind(sub [,start [,end]]) -> int\n\
1632\n\
1633Return the highest index in S where substring sub is found,\n\
1634such that sub is contained within s[start,end]. Optional\n\
1635arguments start and end are interpreted as in slice notation.\n\
1636\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001637Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638
1639static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001640string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001642 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643 if (result == -2)
1644 return NULL;
1645 return PyInt_FromLong(result);
1646}
1647
1648
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001649PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650"S.rindex(sub [,start [,end]]) -> int\n\
1651\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001652Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653
1654static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001655string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 if (result == -2)
1659 return NULL;
1660 if (result == -1) {
1661 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001662 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 return NULL;
1664 }
1665 return PyInt_FromLong(result);
1666}
1667
1668
1669static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001670do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1671{
1672 char *s = PyString_AS_STRING(self);
1673 int len = PyString_GET_SIZE(self);
1674 char *sep = PyString_AS_STRING(sepobj);
1675 int seplen = PyString_GET_SIZE(sepobj);
1676 int i, j;
1677
1678 i = 0;
1679 if (striptype != RIGHTSTRIP) {
1680 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1681 i++;
1682 }
1683 }
1684
1685 j = len;
1686 if (striptype != LEFTSTRIP) {
1687 do {
1688 j--;
1689 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1690 j++;
1691 }
1692
1693 if (i == 0 && j == len && PyString_CheckExact(self)) {
1694 Py_INCREF(self);
1695 return (PyObject*)self;
1696 }
1697 else
1698 return PyString_FromStringAndSize(s+i, j-i);
1699}
1700
1701
1702static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704{
1705 char *s = PyString_AS_STRING(self);
1706 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708 i = 0;
1709 if (striptype != RIGHTSTRIP) {
1710 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1711 i++;
1712 }
1713 }
1714
1715 j = len;
1716 if (striptype != LEFTSTRIP) {
1717 do {
1718 j--;
1719 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1720 j++;
1721 }
1722
Tim Peters8fa5dd02001-09-12 02:18:30 +00001723 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 Py_INCREF(self);
1725 return (PyObject*)self;
1726 }
1727 else
1728 return PyString_FromStringAndSize(s+i, j-i);
1729}
1730
1731
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001732static PyObject *
1733do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1734{
1735 PyObject *sep = NULL;
1736
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001737 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001738 return NULL;
1739
1740 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001741 if (PyString_Check(sep))
1742 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001743#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001744 else if (PyUnicode_Check(sep)) {
1745 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1746 PyObject *res;
1747 if (uniself==NULL)
1748 return NULL;
1749 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1750 striptype, sep);
1751 Py_DECREF(uniself);
1752 return res;
1753 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001754#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001755 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001757#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001758 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001759#else
1760 "%s arg must be None or str",
1761#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001762 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763 return NULL;
1764 }
1765 return do_xstrip(self, striptype, sep);
1766 }
1767
1768 return do_strip(self, striptype);
1769}
1770
1771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001773"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774\n\
1775Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001777If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, BOTHSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001794If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001800 if (PyTuple_GET_SIZE(args) == 0)
1801 return do_strip(self, LEFTSTRIP); /* Common case */
1802 else
1803 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804}
1805
1806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001808"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001810Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001811If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001812If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
1814static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001815string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001817 if (PyTuple_GET_SIZE(args) == 0)
1818 return do_strip(self, RIGHTSTRIP); /* Common case */
1819 else
1820 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
1823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825"S.lower() -> string\n\
1826\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001827Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828
1829static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001830string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831{
1832 char *s = PyString_AS_STRING(self), *s_new;
1833 int i, n = PyString_GET_SIZE(self);
1834 PyObject *new;
1835
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 new = PyString_FromStringAndSize(NULL, n);
1837 if (new == NULL)
1838 return NULL;
1839 s_new = PyString_AsString(new);
1840 for (i = 0; i < n; i++) {
1841 int c = Py_CHARMASK(*s++);
1842 if (isupper(c)) {
1843 *s_new = tolower(c);
1844 } else
1845 *s_new = c;
1846 s_new++;
1847 }
1848 return new;
1849}
1850
1851
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853"S.upper() -> string\n\
1854\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001855Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856
1857static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001858string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859{
1860 char *s = PyString_AS_STRING(self), *s_new;
1861 int i, n = PyString_GET_SIZE(self);
1862 PyObject *new;
1863
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 new = PyString_FromStringAndSize(NULL, n);
1865 if (new == NULL)
1866 return NULL;
1867 s_new = PyString_AsString(new);
1868 for (i = 0; i < n; i++) {
1869 int c = Py_CHARMASK(*s++);
1870 if (islower(c)) {
1871 *s_new = toupper(c);
1872 } else
1873 *s_new = c;
1874 s_new++;
1875 }
1876 return new;
1877}
1878
1879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001880PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881"S.title() -> string\n\
1882\n\
1883Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001884characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885
1886static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001887string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888{
1889 char *s = PyString_AS_STRING(self), *s_new;
1890 int i, n = PyString_GET_SIZE(self);
1891 int previous_is_cased = 0;
1892 PyObject *new;
1893
Guido van Rossum4c08d552000-03-10 22:55:18 +00001894 new = PyString_FromStringAndSize(NULL, n);
1895 if (new == NULL)
1896 return NULL;
1897 s_new = PyString_AsString(new);
1898 for (i = 0; i < n; i++) {
1899 int c = Py_CHARMASK(*s++);
1900 if (islower(c)) {
1901 if (!previous_is_cased)
1902 c = toupper(c);
1903 previous_is_cased = 1;
1904 } else if (isupper(c)) {
1905 if (previous_is_cased)
1906 c = tolower(c);
1907 previous_is_cased = 1;
1908 } else
1909 previous_is_cased = 0;
1910 *s_new++ = c;
1911 }
1912 return new;
1913}
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.capitalize() -> string\n\
1917\n\
1918Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001922string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
1924 char *s = PyString_AS_STRING(self), *s_new;
1925 int i, n = PyString_GET_SIZE(self);
1926 PyObject *new;
1927
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 new = PyString_FromStringAndSize(NULL, n);
1929 if (new == NULL)
1930 return NULL;
1931 s_new = PyString_AsString(new);
1932 if (0 < n) {
1933 int c = Py_CHARMASK(*s++);
1934 if (islower(c))
1935 *s_new = toupper(c);
1936 else
1937 *s_new = c;
1938 s_new++;
1939 }
1940 for (i = 1; i < n; i++) {
1941 int c = Py_CHARMASK(*s++);
1942 if (isupper(c))
1943 *s_new = tolower(c);
1944 else
1945 *s_new = c;
1946 s_new++;
1947 }
1948 return new;
1949}
1950
1951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953"S.count(sub[, start[, end]]) -> int\n\
1954\n\
1955Return the number of occurrences of substring sub in string\n\
1956S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 int len = PyString_GET_SIZE(self), n;
1964 int i = 0, last = INT_MAX;
1965 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
Guido van Rossumc6821402000-05-08 14:08:05 +00001968 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1969 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001971
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 if (PyString_Check(subobj)) {
1973 sub = PyString_AS_STRING(subobj);
1974 n = PyString_GET_SIZE(subobj);
1975 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001976#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001977 else if (PyUnicode_Check(subobj)) {
1978 int count;
1979 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1980 if (count == -1)
1981 return NULL;
1982 else
1983 return PyInt_FromLong((long) count);
1984 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001985#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1987 return NULL;
1988
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001989 string_adjust_indices(&i, &last, len);
1990
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991 m = last + 1 - n;
1992 if (n == 0)
1993 return PyInt_FromLong((long) (m-i));
1994
1995 r = 0;
1996 while (i < m) {
1997 if (!memcmp(s+i, sub, n)) {
1998 r++;
1999 i += n;
2000 } else {
2001 i++;
2002 }
2003 }
2004 return PyInt_FromLong((long) r);
2005}
2006
2007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009"S.swapcase() -> string\n\
2010\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013
2014static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002015string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016{
2017 char *s = PyString_AS_STRING(self), *s_new;
2018 int i, n = PyString_GET_SIZE(self);
2019 PyObject *new;
2020
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 new = PyString_FromStringAndSize(NULL, n);
2022 if (new == NULL)
2023 return NULL;
2024 s_new = PyString_AsString(new);
2025 for (i = 0; i < n; i++) {
2026 int c = Py_CHARMASK(*s++);
2027 if (islower(c)) {
2028 *s_new = toupper(c);
2029 }
2030 else if (isupper(c)) {
2031 *s_new = tolower(c);
2032 }
2033 else
2034 *s_new = c;
2035 s_new++;
2036 }
2037 return new;
2038}
2039
2040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002041PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042"S.translate(table [,deletechars]) -> string\n\
2043\n\
2044Return a copy of the string S, where all characters occurring\n\
2045in the optional argument deletechars are removed, and the\n\
2046remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002047translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048
2049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002050string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 register char *input, *output;
2053 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054 register int i, c, changed = 0;
2055 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057 int inlen, tablen, dellen = 0;
2058 PyObject *result;
2059 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002062 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (PyString_Check(tableobj)) {
2067 table1 = PyString_AS_STRING(tableobj);
2068 tablen = PyString_GET_SIZE(tableobj);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002072 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 parameter; instead a mapping to None will cause characters
2074 to be deleted. */
2075 if (delobj != NULL) {
2076 PyErr_SetString(PyExc_TypeError,
2077 "deletions are implemented differently for unicode");
2078 return NULL;
2079 }
2080 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2081 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002082#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085
Martin v. Löwis00b61272002-12-12 20:03:19 +00002086 if (tablen != 256) {
2087 PyErr_SetString(PyExc_ValueError,
2088 "translation table must be 256 characters long");
2089 return NULL;
2090 }
2091
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 if (delobj != NULL) {
2093 if (PyString_Check(delobj)) {
2094 del_table = PyString_AS_STRING(delobj);
2095 dellen = PyString_GET_SIZE(delobj);
2096 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002097#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 else if (PyUnicode_Check(delobj)) {
2099 PyErr_SetString(PyExc_TypeError,
2100 "deletions are implemented differently for unicode");
2101 return NULL;
2102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002103#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2105 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 }
2107 else {
2108 del_table = NULL;
2109 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 }
2111
2112 table = table1;
2113 inlen = PyString_Size(input_obj);
2114 result = PyString_FromStringAndSize((char *)NULL, inlen);
2115 if (result == NULL)
2116 return NULL;
2117 output_start = output = PyString_AsString(result);
2118 input = PyString_AsString(input_obj);
2119
2120 if (dellen == 0) {
2121 /* If no deletions are required, use faster code */
2122 for (i = inlen; --i >= 0; ) {
2123 c = Py_CHARMASK(*input++);
2124 if (Py_CHARMASK((*output++ = table[c])) != c)
2125 changed = 1;
2126 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return result;
2129 Py_DECREF(result);
2130 Py_INCREF(input_obj);
2131 return input_obj;
2132 }
2133
2134 for (i = 0; i < 256; i++)
2135 trans_table[i] = Py_CHARMASK(table[i]);
2136
2137 for (i = 0; i < dellen; i++)
2138 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2139
2140 for (i = inlen; --i >= 0; ) {
2141 c = Py_CHARMASK(*input++);
2142 if (trans_table[c] != -1)
2143 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2144 continue;
2145 changed = 1;
2146 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002147 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 Py_DECREF(result);
2149 Py_INCREF(input_obj);
2150 return input_obj;
2151 }
2152 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002153 if (inlen > 0)
2154 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155 return result;
2156}
2157
2158
2159/* What follows is used for implementing replace(). Perry Stoll. */
2160
2161/*
2162 mymemfind
2163
2164 strstr replacement for arbitrary blocks of memory.
2165
Barry Warsaw51ac5802000-03-20 16:36:48 +00002166 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 contents of memory pointed to by PAT. Returns the index into MEM if
2168 found, or -1 if not found. If len of PAT is greater than length of
2169 MEM, the function returns -1.
2170*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002171static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002172mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173{
2174 register int ii;
2175
2176 /* pattern can not occur in the last pat_len-1 chars */
2177 len -= pat_len;
2178
2179 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002180 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 return ii;
2182 }
2183 }
2184 return -1;
2185}
2186
2187/*
2188 mymemcnt
2189
2190 Return the number of distinct times PAT is found in MEM.
2191 meaning mem=1111 and pat==11 returns 2.
2192 mem=11111 and pat==11 also return 2.
2193 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002194static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002195mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196{
2197 register int offset = 0;
2198 int nfound = 0;
2199
2200 while (len >= 0) {
2201 offset = mymemfind(mem, len, pat, pat_len);
2202 if (offset == -1)
2203 break;
2204 mem += offset + pat_len;
2205 len -= offset + pat_len;
2206 nfound++;
2207 }
2208 return nfound;
2209}
2210
2211/*
2212 mymemreplace
2213
Thomas Wouters7e474022000-07-16 12:04:32 +00002214 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215 replaced with SUB.
2216
Thomas Wouters7e474022000-07-16 12:04:32 +00002217 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 of PAT in STR, then the original string is returned. Otherwise, a new
2219 string is allocated here and returned.
2220
2221 on return, out_len is:
2222 the length of output string, or
2223 -1 if the input string is returned, or
2224 unchanged if an error occurs (no memory).
2225
2226 return value is:
2227 the new string allocated locally, or
2228 NULL if an error occurred.
2229*/
2230static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002231mymemreplace(const char *str, int len, /* input string */
2232 const char *pat, int pat_len, /* pattern string to find */
2233 const char *sub, int sub_len, /* substitution string */
2234 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236{
2237 char *out_s;
2238 char *new_s;
2239 int nfound, offset, new_len;
2240
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002241 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242 goto return_same;
2243
2244 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002245 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002246 if (count < 0)
2247 count = INT_MAX;
2248 else if (nfound > count)
2249 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 if (nfound == 0)
2251 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254 if (new_len == 0) {
2255 /* Have to allocate something for the caller to free(). */
2256 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002257 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002258 return NULL;
2259 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002261 else {
2262 assert(new_len > 0);
2263 new_s = (char *)PyMem_MALLOC(new_len);
2264 if (new_s == NULL)
2265 return NULL;
2266 out_s = new_s;
2267
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002268 if (pat_len > 0) {
2269 for (; nfound > 0; --nfound) {
2270 /* find index of next instance of pattern */
2271 offset = mymemfind(str, len, pat, pat_len);
2272 if (offset == -1)
2273 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002274
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002275 /* copy non matching part of input string */
2276 memcpy(new_s, str, offset);
2277 str += offset + pat_len;
2278 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002279
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002280 /* copy substitute into the output string */
2281 new_s += offset;
2282 memcpy(new_s, sub, sub_len);
2283 new_s += sub_len;
2284 }
2285 /* copy any remaining values into output string */
2286 if (len > 0)
2287 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002288 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002289 else {
2290 for (;;++str, --len) {
2291 memcpy(new_s, sub, sub_len);
2292 new_s += sub_len;
2293 if (--nfound <= 0) {
2294 memcpy(new_s, str, len);
2295 break;
2296 }
2297 *new_s++ = *str;
2298 }
2299 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002300 }
2301 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302 return out_s;
2303
2304 return_same:
2305 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002306 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307}
2308
2309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002310PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311"S.replace (old, new[, maxsplit]) -> string\n\
2312\n\
2313Return a copy of string S with all occurrences of substring\n\
2314old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002315given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
2317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002318string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 const char *str = PyString_AS_STRING(self), *sub, *repl;
2321 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002322 const int len = PyString_GET_SIZE(self);
2323 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 if (!PyArg_ParseTuple(args, "OO|i:replace",
2329 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331
2332 if (PyString_Check(subobj)) {
2333 sub = PyString_AS_STRING(subobj);
2334 sub_len = PyString_GET_SIZE(subobj);
2335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002336#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002338 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002340#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2342 return NULL;
2343
2344 if (PyString_Check(replobj)) {
2345 repl = PyString_AS_STRING(replobj);
2346 repl_len = PyString_GET_SIZE(replobj);
2347 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002348#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002350 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002352#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2354 return NULL;
2355
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 if (new_s == NULL) {
2358 PyErr_NoMemory();
2359 return NULL;
2360 }
2361 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002362 if (PyString_CheckExact(self)) {
2363 /* we're returning another reference to self */
2364 new = (PyObject*)self;
2365 Py_INCREF(new);
2366 }
2367 else {
2368 new = PyString_FromStringAndSize(str, len);
2369 if (new == NULL)
2370 return NULL;
2371 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 }
2373 else {
2374 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002375 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 }
2377 return new;
2378}
2379
2380
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002381PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002382"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002384Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002386comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387
2388static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002389string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 int plen;
2395 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002396 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398
Guido van Rossumc6821402000-05-08 14:08:05 +00002399 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2400 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 return NULL;
2402 if (PyString_Check(subobj)) {
2403 prefix = PyString_AS_STRING(subobj);
2404 plen = PyString_GET_SIZE(subobj);
2405 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002406#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002407 else if (PyUnicode_Check(subobj)) {
2408 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002409 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002410 subobj, start, end, -1);
2411 if (rc == -1)
2412 return NULL;
2413 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002414 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002416#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 return NULL;
2419
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002420 string_adjust_indices(&start, &end, len);
2421
2422 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002423 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002425 if (end-start >= plen)
2426 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2427 else
2428 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429}
2430
2431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002432PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002433"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002435Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002437comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002440string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 const char* suffix;
2445 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002447 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
Guido van Rossumc6821402000-05-08 14:08:05 +00002450 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2451 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452 return NULL;
2453 if (PyString_Check(subobj)) {
2454 suffix = PyString_AS_STRING(subobj);
2455 slen = PyString_GET_SIZE(subobj);
2456 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002457#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002458 else if (PyUnicode_Check(subobj)) {
2459 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002460 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002461 subobj, start, end, +1);
2462 if (rc == -1)
2463 return NULL;
2464 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002465 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002467#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469 return NULL;
2470
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002471 string_adjust_indices(&start, &end, len);
2472
2473 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002474 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002476 if (end-slen > start)
2477 start = end - slen;
2478 if (end-start >= slen)
2479 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2480 else
2481 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482}
2483
2484
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002485PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002486"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002487\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002488Encodes S using the codec registered for encoding. encoding defaults\n\
2489to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002490handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002491a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2492'xmlcharrefreplace' as well as any other name registered with\n\
2493codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002494
2495static PyObject *
2496string_encode(PyStringObject *self, PyObject *args)
2497{
2498 char *encoding = NULL;
2499 char *errors = NULL;
2500 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2501 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002502 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2503}
2504
2505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002506PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002507"S.decode([encoding[,errors]]) -> object\n\
2508\n\
2509Decodes S using the codec registered for encoding. encoding defaults\n\
2510to the default encoding. errors may be given to set a different error\n\
2511handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002512a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2513as well as any other name registerd with codecs.register_error that is\n\
2514able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002515
2516static PyObject *
2517string_decode(PyStringObject *self, PyObject *args)
2518{
2519 char *encoding = NULL;
2520 char *errors = NULL;
2521 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2522 return NULL;
2523 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002524}
2525
2526
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002527PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528"S.expandtabs([tabsize]) -> string\n\
2529\n\
2530Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002531If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532
2533static PyObject*
2534string_expandtabs(PyStringObject *self, PyObject *args)
2535{
2536 const char *e, *p;
2537 char *q;
2538 int i, j;
2539 PyObject *u;
2540 int tabsize = 8;
2541
2542 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2543 return NULL;
2544
Thomas Wouters7e474022000-07-16 12:04:32 +00002545 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 i = j = 0;
2547 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2548 for (p = PyString_AS_STRING(self); p < e; p++)
2549 if (*p == '\t') {
2550 if (tabsize > 0)
2551 j += tabsize - (j % tabsize);
2552 }
2553 else {
2554 j++;
2555 if (*p == '\n' || *p == '\r') {
2556 i += j;
2557 j = 0;
2558 }
2559 }
2560
2561 /* Second pass: create output string and fill it */
2562 u = PyString_FromStringAndSize(NULL, i + j);
2563 if (!u)
2564 return NULL;
2565
2566 j = 0;
2567 q = PyString_AS_STRING(u);
2568
2569 for (p = PyString_AS_STRING(self); p < e; p++)
2570 if (*p == '\t') {
2571 if (tabsize > 0) {
2572 i = tabsize - (j % tabsize);
2573 j += i;
2574 while (i--)
2575 *q++ = ' ';
2576 }
2577 }
2578 else {
2579 j++;
2580 *q++ = *p;
2581 if (*p == '\n' || *p == '\r')
2582 j = 0;
2583 }
2584
2585 return u;
2586}
2587
Tim Peters8fa5dd02001-09-12 02:18:30 +00002588static PyObject *
2589pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590{
2591 PyObject *u;
2592
2593 if (left < 0)
2594 left = 0;
2595 if (right < 0)
2596 right = 0;
2597
Tim Peters8fa5dd02001-09-12 02:18:30 +00002598 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 Py_INCREF(self);
2600 return (PyObject *)self;
2601 }
2602
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002603 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 left + PyString_GET_SIZE(self) + right);
2605 if (u) {
2606 if (left)
2607 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002608 memcpy(PyString_AS_STRING(u) + left,
2609 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610 PyString_GET_SIZE(self));
2611 if (right)
2612 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2613 fill, right);
2614 }
2615
2616 return u;
2617}
2618
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002619PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002620"S.ljust(width) -> string\n"
2621"\n"
2622"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002623"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624
2625static PyObject *
2626string_ljust(PyStringObject *self, PyObject *args)
2627{
2628 int width;
2629 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2630 return NULL;
2631
Tim Peters8fa5dd02001-09-12 02:18:30 +00002632 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 Py_INCREF(self);
2634 return (PyObject*) self;
2635 }
2636
2637 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2638}
2639
2640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002641PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002642"S.rjust(width) -> string\n"
2643"\n"
2644"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002645"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646
2647static PyObject *
2648string_rjust(PyStringObject *self, PyObject *args)
2649{
2650 int width;
2651 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2652 return NULL;
2653
Tim Peters8fa5dd02001-09-12 02:18:30 +00002654 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 Py_INCREF(self);
2656 return (PyObject*) self;
2657 }
2658
2659 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2660}
2661
2662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002663PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002664"S.center(width) -> string\n"
2665"\n"
2666"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002667"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002668
2669static PyObject *
2670string_center(PyStringObject *self, PyObject *args)
2671{
2672 int marg, left;
2673 int width;
2674
2675 if (!PyArg_ParseTuple(args, "i:center", &width))
2676 return NULL;
2677
Tim Peters8fa5dd02001-09-12 02:18:30 +00002678 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679 Py_INCREF(self);
2680 return (PyObject*) self;
2681 }
2682
2683 marg = width - PyString_GET_SIZE(self);
2684 left = marg / 2 + (marg & width & 1);
2685
2686 return pad(self, left, marg - left, ' ');
2687}
2688
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002689PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002690"S.zfill(width) -> string\n"
2691"\n"
2692"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002693"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002694
2695static PyObject *
2696string_zfill(PyStringObject *self, PyObject *args)
2697{
2698 int fill;
2699 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002700 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002701
2702 int width;
2703 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2704 return NULL;
2705
2706 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002707 if (PyString_CheckExact(self)) {
2708 Py_INCREF(self);
2709 return (PyObject*) self;
2710 }
2711 else
2712 return PyString_FromStringAndSize(
2713 PyString_AS_STRING(self),
2714 PyString_GET_SIZE(self)
2715 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002716 }
2717
2718 fill = width - PyString_GET_SIZE(self);
2719
2720 s = pad(self, fill, 0, '0');
2721
2722 if (s == NULL)
2723 return NULL;
2724
2725 p = PyString_AS_STRING(s);
2726 if (p[fill] == '+' || p[fill] == '-') {
2727 /* move sign to beginning of string */
2728 p[0] = p[fill];
2729 p[fill] = '0';
2730 }
2731
2732 return (PyObject*) s;
2733}
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002736"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002737"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002739"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002740
2741static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002742string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743{
Fred Drakeba096332000-07-09 07:04:36 +00002744 register const unsigned char *p
2745 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002746 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002747
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748 /* Shortcut for single character strings */
2749 if (PyString_GET_SIZE(self) == 1 &&
2750 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002751 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002752
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002753 /* Special case for empty strings */
2754 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002755 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002756
Guido van Rossum4c08d552000-03-10 22:55:18 +00002757 e = p + PyString_GET_SIZE(self);
2758 for (; p < e; p++) {
2759 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002761 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002763}
2764
2765
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002768\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002770and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002771
2772static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002773string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002774{
Fred Drakeba096332000-07-09 07:04:36 +00002775 register const unsigned char *p
2776 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002777 register const unsigned char *e;
2778
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779 /* Shortcut for single character strings */
2780 if (PyString_GET_SIZE(self) == 1 &&
2781 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002782 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002783
2784 /* Special case for empty strings */
2785 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002786 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002787
2788 e = p + PyString_GET_SIZE(self);
2789 for (; p < e; p++) {
2790 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002791 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002792 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002794}
2795
2796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002797PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002799\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002801and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002802
2803static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002804string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002805{
Fred Drakeba096332000-07-09 07:04:36 +00002806 register const unsigned char *p
2807 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002808 register const unsigned char *e;
2809
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810 /* Shortcut for single character strings */
2811 if (PyString_GET_SIZE(self) == 1 &&
2812 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002813 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002814
2815 /* Special case for empty strings */
2816 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002817 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002818
2819 e = p + PyString_GET_SIZE(self);
2820 for (; p < e; p++) {
2821 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002822 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002823 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002824 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002825}
2826
2827
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002828PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002831Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002832False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833
2834static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002835string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836{
Fred Drakeba096332000-07-09 07:04:36 +00002837 register const unsigned char *p
2838 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002839 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841 /* Shortcut for single character strings */
2842 if (PyString_GET_SIZE(self) == 1 &&
2843 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002844 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002846 /* Special case for empty strings */
2847 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002848 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002849
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850 e = p + PyString_GET_SIZE(self);
2851 for (; p < e; p++) {
2852 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002853 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002855 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856}
2857
2858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002859PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002862Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002863at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864
2865static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002866string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867{
Fred Drakeba096332000-07-09 07:04:36 +00002868 register const unsigned char *p
2869 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002870 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871 int cased;
2872
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873 /* Shortcut for single character strings */
2874 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002875 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002877 /* Special case for empty strings */
2878 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002879 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002880
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881 e = p + PyString_GET_SIZE(self);
2882 cased = 0;
2883 for (; p < e; p++) {
2884 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002885 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 else if (!cased && islower(*p))
2887 cased = 1;
2888 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002889 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890}
2891
2892
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002893PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002894"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002897at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898
2899static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002900string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901{
Fred Drakeba096332000-07-09 07:04:36 +00002902 register const unsigned char *p
2903 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002904 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905 int cased;
2906
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907 /* Shortcut for single character strings */
2908 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002909 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002911 /* Special case for empty strings */
2912 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002913 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002914
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915 e = p + PyString_GET_SIZE(self);
2916 cased = 0;
2917 for (; p < e; p++) {
2918 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002919 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 else if (!cased && isupper(*p))
2921 cased = 1;
2922 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002923 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002924}
2925
2926
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002927PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002928"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002930Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002931may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002932ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002933
2934static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002935string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002936{
Fred Drakeba096332000-07-09 07:04:36 +00002937 register const unsigned char *p
2938 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002939 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002940 int cased, previous_is_cased;
2941
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 /* Shortcut for single character strings */
2943 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002944 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002945
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002946 /* Special case for empty strings */
2947 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002948 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002949
Guido van Rossum4c08d552000-03-10 22:55:18 +00002950 e = p + PyString_GET_SIZE(self);
2951 cased = 0;
2952 previous_is_cased = 0;
2953 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002954 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955
2956 if (isupper(ch)) {
2957 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002958 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002959 previous_is_cased = 1;
2960 cased = 1;
2961 }
2962 else if (islower(ch)) {
2963 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965 previous_is_cased = 1;
2966 cased = 1;
2967 }
2968 else
2969 previous_is_cased = 0;
2970 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002971 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002972}
2973
2974
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002975PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002976"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977\n\
2978Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002979Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002980is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981
2982#define SPLIT_APPEND(data, left, right) \
2983 str = PyString_FromStringAndSize(data + left, right - left); \
2984 if (!str) \
2985 goto onError; \
2986 if (PyList_Append(list, str)) { \
2987 Py_DECREF(str); \
2988 goto onError; \
2989 } \
2990 else \
2991 Py_DECREF(str);
2992
2993static PyObject*
2994string_splitlines(PyStringObject *self, PyObject *args)
2995{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996 register int i;
2997 register int j;
2998 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002999 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000 PyObject *list;
3001 PyObject *str;
3002 char *data;
3003
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003004 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003005 return NULL;
3006
3007 data = PyString_AS_STRING(self);
3008 len = PyString_GET_SIZE(self);
3009
Guido van Rossum4c08d552000-03-10 22:55:18 +00003010 list = PyList_New(0);
3011 if (!list)
3012 goto onError;
3013
3014 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003015 int eol;
3016
Guido van Rossum4c08d552000-03-10 22:55:18 +00003017 /* Find a line and append it */
3018 while (i < len && data[i] != '\n' && data[i] != '\r')
3019 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003020
3021 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003022 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003023 if (i < len) {
3024 if (data[i] == '\r' && i + 1 < len &&
3025 data[i+1] == '\n')
3026 i += 2;
3027 else
3028 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003029 if (keepends)
3030 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003031 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003032 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 j = i;
3034 }
3035 if (j < len) {
3036 SPLIT_APPEND(data, j, len);
3037 }
3038
3039 return list;
3040
3041 onError:
3042 Py_DECREF(list);
3043 return NULL;
3044}
3045
3046#undef SPLIT_APPEND
3047
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003049static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003050string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003051 /* Counterparts of the obsolete stropmodule functions; except
3052 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003053 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3054 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3055 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3056 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003057 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3058 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3059 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3060 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3061 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3062 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3063 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003064 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3065 capitalize__doc__},
3066 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3067 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3068 endswith__doc__},
3069 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3070 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3071 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3072 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3073 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3074 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3075 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3076 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3077 startswith__doc__},
3078 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3079 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3080 swapcase__doc__},
3081 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3082 translate__doc__},
3083 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3084 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3085 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3086 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3087 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3088 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3089 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3090 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3091 expandtabs__doc__},
3092 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3093 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003094 {NULL, NULL} /* sentinel */
3095};
3096
Jeremy Hylton938ace62002-07-17 16:30:39 +00003097static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003098str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3099
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003100static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003101string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003102{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003103 PyObject *x = NULL;
3104 static char *kwlist[] = {"object", 0};
3105
Guido van Rossumae960af2001-08-30 03:11:59 +00003106 if (type != &PyString_Type)
3107 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003108 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3109 return NULL;
3110 if (x == NULL)
3111 return PyString_FromString("");
3112 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003113}
3114
Guido van Rossumae960af2001-08-30 03:11:59 +00003115static PyObject *
3116str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3117{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003118 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003119 int n;
3120
3121 assert(PyType_IsSubtype(type, &PyString_Type));
3122 tmp = string_new(&PyString_Type, args, kwds);
3123 if (tmp == NULL)
3124 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003125 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003126 n = PyString_GET_SIZE(tmp);
3127 pnew = type->tp_alloc(type, n);
3128 if (pnew != NULL) {
3129 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003130 ((PyStringObject *)pnew)->ob_shash =
3131 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003132 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003133 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003134 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003135 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003136}
3137
Guido van Rossumcacfc072002-05-24 19:01:59 +00003138static PyObject *
3139basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3140{
3141 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003142 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003143 return NULL;
3144}
3145
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003146static PyObject *
3147string_mod(PyObject *v, PyObject *w)
3148{
3149 if (!PyString_Check(v)) {
3150 Py_INCREF(Py_NotImplemented);
3151 return Py_NotImplemented;
3152 }
3153 return PyString_Format(v, w);
3154}
3155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003156PyDoc_STRVAR(basestring_doc,
3157"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003158
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003159static PyNumberMethods string_as_number = {
3160 0, /*nb_add*/
3161 0, /*nb_subtract*/
3162 0, /*nb_multiply*/
3163 0, /*nb_divide*/
3164 string_mod, /*nb_remainder*/
3165};
3166
3167
Guido van Rossumcacfc072002-05-24 19:01:59 +00003168PyTypeObject PyBaseString_Type = {
3169 PyObject_HEAD_INIT(&PyType_Type)
3170 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003171 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003172 0,
3173 0,
3174 0, /* tp_dealloc */
3175 0, /* tp_print */
3176 0, /* tp_getattr */
3177 0, /* tp_setattr */
3178 0, /* tp_compare */
3179 0, /* tp_repr */
3180 0, /* tp_as_number */
3181 0, /* tp_as_sequence */
3182 0, /* tp_as_mapping */
3183 0, /* tp_hash */
3184 0, /* tp_call */
3185 0, /* tp_str */
3186 0, /* tp_getattro */
3187 0, /* tp_setattro */
3188 0, /* tp_as_buffer */
3189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3190 basestring_doc, /* tp_doc */
3191 0, /* tp_traverse */
3192 0, /* tp_clear */
3193 0, /* tp_richcompare */
3194 0, /* tp_weaklistoffset */
3195 0, /* tp_iter */
3196 0, /* tp_iternext */
3197 0, /* tp_methods */
3198 0, /* tp_members */
3199 0, /* tp_getset */
3200 &PyBaseObject_Type, /* tp_base */
3201 0, /* tp_dict */
3202 0, /* tp_descr_get */
3203 0, /* tp_descr_set */
3204 0, /* tp_dictoffset */
3205 0, /* tp_init */
3206 0, /* tp_alloc */
3207 basestring_new, /* tp_new */
3208 0, /* tp_free */
3209};
3210
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003211PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003212"str(object) -> string\n\
3213\n\
3214Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003215If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003216
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003217PyTypeObject PyString_Type = {
3218 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003219 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003220 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003221 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003222 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003223 (destructor)string_dealloc, /* tp_dealloc */
3224 (printfunc)string_print, /* tp_print */
3225 0, /* tp_getattr */
3226 0, /* tp_setattr */
3227 0, /* tp_compare */
3228 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003229 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003230 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003231 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003232 (hashfunc)string_hash, /* tp_hash */
3233 0, /* tp_call */
3234 (reprfunc)string_str, /* tp_str */
3235 PyObject_GenericGetAttr, /* tp_getattro */
3236 0, /* tp_setattro */
3237 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003238 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3239 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003240 string_doc, /* tp_doc */
3241 0, /* tp_traverse */
3242 0, /* tp_clear */
3243 (richcmpfunc)string_richcompare, /* tp_richcompare */
3244 0, /* tp_weaklistoffset */
3245 0, /* tp_iter */
3246 0, /* tp_iternext */
3247 string_methods, /* tp_methods */
3248 0, /* tp_members */
3249 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003250 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003251 0, /* tp_dict */
3252 0, /* tp_descr_get */
3253 0, /* tp_descr_set */
3254 0, /* tp_dictoffset */
3255 0, /* tp_init */
3256 0, /* tp_alloc */
3257 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003258 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003259};
3260
3261void
Fred Drakeba096332000-07-09 07:04:36 +00003262PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003263{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003264 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003265 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003266 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003267 if (w == NULL || !PyString_Check(*pv)) {
3268 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003269 *pv = NULL;
3270 return;
3271 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003272 v = string_concat((PyStringObject *) *pv, w);
3273 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003274 *pv = v;
3275}
3276
Guido van Rossum013142a1994-08-30 08:19:36 +00003277void
Fred Drakeba096332000-07-09 07:04:36 +00003278PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003279{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 PyString_Concat(pv, w);
3281 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003282}
3283
3284
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003285/* The following function breaks the notion that strings are immutable:
3286 it changes the size of a string. We get away with this only if there
3287 is only one module referencing the object. You can also think of it
3288 as creating a new string object and destroying the old one, only
3289 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003290 already be known to some other part of the code...
3291 Note that if there's not enough memory to resize the string, the original
3292 string object at *pv is deallocated, *pv is set to NULL, an "out of
3293 memory" exception is set, and -1 is returned. Else (on success) 0 is
3294 returned, and the value in *pv may or may not be the same as on input.
3295 As always, an extra byte is allocated for a trailing \0 byte (newsize
3296 does *not* include that), and a trailing \0 byte is stored.
3297*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003298
3299int
Fred Drakeba096332000-07-09 07:04:36 +00003300_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003301{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003302 register PyObject *v;
3303 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003304 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003305 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003306 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003307 Py_DECREF(v);
3308 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003309 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003310 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003311 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003312 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003313 _Py_ForgetReference(v);
3314 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003315 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003316 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003317 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003318 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003319 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003320 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003321 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003322 _Py_NewReference(*pv);
3323 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003324 sv->ob_size = newsize;
3325 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003326 return 0;
3327}
Guido van Rossume5372401993-03-16 12:15:04 +00003328
3329/* Helpers for formatstring */
3330
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003331static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003332getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003333{
3334 int argidx = *p_argidx;
3335 if (argidx < arglen) {
3336 (*p_argidx)++;
3337 if (arglen < 0)
3338 return args;
3339 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003340 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003341 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003342 PyErr_SetString(PyExc_TypeError,
3343 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003344 return NULL;
3345}
3346
Tim Peters38fd5b62000-09-21 05:43:11 +00003347/* Format codes
3348 * F_LJUST '-'
3349 * F_SIGN '+'
3350 * F_BLANK ' '
3351 * F_ALT '#'
3352 * F_ZERO '0'
3353 */
Guido van Rossume5372401993-03-16 12:15:04 +00003354#define F_LJUST (1<<0)
3355#define F_SIGN (1<<1)
3356#define F_BLANK (1<<2)
3357#define F_ALT (1<<3)
3358#define F_ZERO (1<<4)
3359
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003360static int
Fred Drakeba096332000-07-09 07:04:36 +00003361formatfloat(char *buf, size_t buflen, int flags,
3362 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003363{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003364 /* fmt = '%#.' + `prec` + `type`
3365 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003366 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003367 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003368 x = PyFloat_AsDouble(v);
3369 if (x == -1.0 && PyErr_Occurred()) {
3370 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003371 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003372 }
Guido van Rossume5372401993-03-16 12:15:04 +00003373 if (prec < 0)
3374 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003375 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3376 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003377 /* Worst case length calc to ensure no buffer overrun:
3378
3379 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003380 fmt = %#.<prec>g
3381 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003382 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003383 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003384
3385 'f' formats:
3386 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3387 len = 1 + 50 + 1 + prec = 52 + prec
3388
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003389 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003390 always given), therefore increase the length by one.
3391
3392 */
3393 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3394 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003395 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003396 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003397 return -1;
3398 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003399 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3400 (flags&F_ALT) ? "#" : "",
3401 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003402 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003403 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003404}
3405
Tim Peters38fd5b62000-09-21 05:43:11 +00003406/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3407 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3408 * Python's regular ints.
3409 * Return value: a new PyString*, or NULL if error.
3410 * . *pbuf is set to point into it,
3411 * *plen set to the # of chars following that.
3412 * Caller must decref it when done using pbuf.
3413 * The string starting at *pbuf is of the form
3414 * "-"? ("0x" | "0X")? digit+
3415 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003416 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003417 * There will be at least prec digits, zero-filled on the left if
3418 * necessary to get that many.
3419 * val object to be converted
3420 * flags bitmask of format flags; only F_ALT is looked at
3421 * prec minimum number of digits; 0-fill on left if needed
3422 * type a character in [duoxX]; u acts the same as d
3423 *
3424 * CAUTION: o, x and X conversions on regular ints can never
3425 * produce a '-' sign, but can for Python's unbounded ints.
3426 */
3427PyObject*
3428_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3429 char **pbuf, int *plen)
3430{
3431 PyObject *result = NULL;
3432 char *buf;
3433 int i;
3434 int sign; /* 1 if '-', else 0 */
3435 int len; /* number of characters */
3436 int numdigits; /* len == numnondigits + numdigits */
3437 int numnondigits = 0;
3438
3439 switch (type) {
3440 case 'd':
3441 case 'u':
3442 result = val->ob_type->tp_str(val);
3443 break;
3444 case 'o':
3445 result = val->ob_type->tp_as_number->nb_oct(val);
3446 break;
3447 case 'x':
3448 case 'X':
3449 numnondigits = 2;
3450 result = val->ob_type->tp_as_number->nb_hex(val);
3451 break;
3452 default:
3453 assert(!"'type' not in [duoxX]");
3454 }
3455 if (!result)
3456 return NULL;
3457
3458 /* To modify the string in-place, there can only be one reference. */
3459 if (result->ob_refcnt != 1) {
3460 PyErr_BadInternalCall();
3461 return NULL;
3462 }
3463 buf = PyString_AsString(result);
3464 len = PyString_Size(result);
3465 if (buf[len-1] == 'L') {
3466 --len;
3467 buf[len] = '\0';
3468 }
3469 sign = buf[0] == '-';
3470 numnondigits += sign;
3471 numdigits = len - numnondigits;
3472 assert(numdigits > 0);
3473
Tim Petersfff53252001-04-12 18:38:48 +00003474 /* Get rid of base marker unless F_ALT */
3475 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003476 /* Need to skip 0x, 0X or 0. */
3477 int skipped = 0;
3478 switch (type) {
3479 case 'o':
3480 assert(buf[sign] == '0');
3481 /* If 0 is only digit, leave it alone. */
3482 if (numdigits > 1) {
3483 skipped = 1;
3484 --numdigits;
3485 }
3486 break;
3487 case 'x':
3488 case 'X':
3489 assert(buf[sign] == '0');
3490 assert(buf[sign + 1] == 'x');
3491 skipped = 2;
3492 numnondigits -= 2;
3493 break;
3494 }
3495 if (skipped) {
3496 buf += skipped;
3497 len -= skipped;
3498 if (sign)
3499 buf[0] = '-';
3500 }
3501 assert(len == numnondigits + numdigits);
3502 assert(numdigits > 0);
3503 }
3504
3505 /* Fill with leading zeroes to meet minimum width. */
3506 if (prec > numdigits) {
3507 PyObject *r1 = PyString_FromStringAndSize(NULL,
3508 numnondigits + prec);
3509 char *b1;
3510 if (!r1) {
3511 Py_DECREF(result);
3512 return NULL;
3513 }
3514 b1 = PyString_AS_STRING(r1);
3515 for (i = 0; i < numnondigits; ++i)
3516 *b1++ = *buf++;
3517 for (i = 0; i < prec - numdigits; i++)
3518 *b1++ = '0';
3519 for (i = 0; i < numdigits; i++)
3520 *b1++ = *buf++;
3521 *b1 = '\0';
3522 Py_DECREF(result);
3523 result = r1;
3524 buf = PyString_AS_STRING(result);
3525 len = numnondigits + prec;
3526 }
3527
3528 /* Fix up case for hex conversions. */
3529 switch (type) {
3530 case 'x':
3531 /* Need to convert all upper case letters to lower case. */
3532 for (i = 0; i < len; i++)
3533 if (buf[i] >= 'A' && buf[i] <= 'F')
3534 buf[i] += 'a'-'A';
3535 break;
3536 case 'X':
3537 /* Need to convert 0x to 0X (and -0x to -0X). */
3538 if (buf[sign + 1] == 'x')
3539 buf[sign + 1] = 'X';
3540 break;
3541 }
3542 *pbuf = buf;
3543 *plen = len;
3544 return result;
3545}
3546
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003547static int
Fred Drakeba096332000-07-09 07:04:36 +00003548formatint(char *buf, size_t buflen, int flags,
3549 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003550{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003551 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003552 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3553 + 1 + 1 = 24 */
3554 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003555 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003556
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003557 x = PyInt_AsLong(v);
3558 if (x == -1 && PyErr_Occurred()) {
3559 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003560 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003561 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003562 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003563 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003564 "%u/%o/%x/%X of negative int will return "
3565 "a signed string in Python 2.4 and up") < 0)
3566 return -1;
3567 }
Guido van Rossume5372401993-03-16 12:15:04 +00003568 if (prec < 0)
3569 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003570
3571 if ((flags & F_ALT) &&
3572 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003573 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003574 * of issues that cause pain:
3575 * - when 0 is being converted, the C standard leaves off
3576 * the '0x' or '0X', which is inconsistent with other
3577 * %#x/%#X conversions and inconsistent with Python's
3578 * hex() function
3579 * - there are platforms that violate the standard and
3580 * convert 0 with the '0x' or '0X'
3581 * (Metrowerks, Compaq Tru64)
3582 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003583 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003584 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003585 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003586 * We can achieve the desired consistency by inserting our
3587 * own '0x' or '0X' prefix, and substituting %x/%X in place
3588 * of %#x/%#X.
3589 *
3590 * Note that this is the same approach as used in
3591 * formatint() in unicodeobject.c
3592 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003593 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003594 type, prec, type);
3595 }
3596 else {
3597 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003598 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003599 prec, type);
3600 }
3601
Tim Peters38fd5b62000-09-21 05:43:11 +00003602 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003603 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3604 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003605 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003606 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003607 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003608 return -1;
3609 }
Tim Peters885d4572001-11-28 20:27:42 +00003610 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003611 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003612}
3613
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003614static int
Fred Drakeba096332000-07-09 07:04:36 +00003615formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003616{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003617 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003618 if (PyString_Check(v)) {
3619 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003620 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003621 }
3622 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003623 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003624 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003625 }
3626 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003627 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003628}
3629
Guido van Rossum013142a1994-08-30 08:19:36 +00003630
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003631/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3632
3633 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3634 chars are formatted. XXX This is a magic number. Each formatting
3635 routine does bounds checking to ensure no overflow, but a better
3636 solution may be to malloc a buffer of appropriate size for each
3637 format. For now, the current solution is sufficient.
3638*/
3639#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003640
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003641PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003642PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003643{
3644 char *fmt, *res;
3645 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003646 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003647 PyObject *result, *orig_args;
3648#ifdef Py_USING_UNICODE
3649 PyObject *v, *w;
3650#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003651 PyObject *dict = NULL;
3652 if (format == NULL || !PyString_Check(format) || args == NULL) {
3653 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003654 return NULL;
3655 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003656 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003657 fmt = PyString_AS_STRING(format);
3658 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003659 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003660 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003661 if (result == NULL)
3662 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003663 res = PyString_AsString(result);
3664 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003665 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003666 argidx = 0;
3667 }
3668 else {
3669 arglen = -1;
3670 argidx = -2;
3671 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003672 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3673 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003674 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003675 while (--fmtcnt >= 0) {
3676 if (*fmt != '%') {
3677 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003678 rescnt = fmtcnt + 100;
3679 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003680 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003681 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003682 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003683 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003684 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003685 }
3686 *res++ = *fmt++;
3687 }
3688 else {
3689 /* Got a format specifier */
3690 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003691 int width = -1;
3692 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003693 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003694 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003695 PyObject *v = NULL;
3696 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003697 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003698 int sign;
3699 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003700 char formatbuf[FORMATBUFLEN];
3701 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003702#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003703 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003704 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003705#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003706
Guido van Rossumda9c2711996-12-05 21:58:58 +00003707 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003708 if (*fmt == '(') {
3709 char *keystart;
3710 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003711 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003712 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003713
3714 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003715 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003716 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003717 goto error;
3718 }
3719 ++fmt;
3720 --fmtcnt;
3721 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003722 /* Skip over balanced parentheses */
3723 while (pcount > 0 && --fmtcnt >= 0) {
3724 if (*fmt == ')')
3725 --pcount;
3726 else if (*fmt == '(')
3727 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003728 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003729 }
3730 keylen = fmt - keystart - 1;
3731 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003732 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003733 "incomplete format key");
3734 goto error;
3735 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003736 key = PyString_FromStringAndSize(keystart,
3737 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003738 if (key == NULL)
3739 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003740 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003741 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003742 args_owned = 0;
3743 }
3744 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003745 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003746 if (args == NULL) {
3747 goto error;
3748 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003749 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003750 arglen = -1;
3751 argidx = -2;
3752 }
Guido van Rossume5372401993-03-16 12:15:04 +00003753 while (--fmtcnt >= 0) {
3754 switch (c = *fmt++) {
3755 case '-': flags |= F_LJUST; continue;
3756 case '+': flags |= F_SIGN; continue;
3757 case ' ': flags |= F_BLANK; continue;
3758 case '#': flags |= F_ALT; continue;
3759 case '0': flags |= F_ZERO; continue;
3760 }
3761 break;
3762 }
3763 if (c == '*') {
3764 v = getnextarg(args, arglen, &argidx);
3765 if (v == NULL)
3766 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003767 if (!PyInt_Check(v)) {
3768 PyErr_SetString(PyExc_TypeError,
3769 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003770 goto error;
3771 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003772 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003773 if (width < 0) {
3774 flags |= F_LJUST;
3775 width = -width;
3776 }
Guido van Rossume5372401993-03-16 12:15:04 +00003777 if (--fmtcnt >= 0)
3778 c = *fmt++;
3779 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003780 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003781 width = c - '0';
3782 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003783 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003784 if (!isdigit(c))
3785 break;
3786 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003787 PyErr_SetString(
3788 PyExc_ValueError,
3789 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003790 goto error;
3791 }
3792 width = width*10 + (c - '0');
3793 }
3794 }
3795 if (c == '.') {
3796 prec = 0;
3797 if (--fmtcnt >= 0)
3798 c = *fmt++;
3799 if (c == '*') {
3800 v = getnextarg(args, arglen, &argidx);
3801 if (v == NULL)
3802 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003803 if (!PyInt_Check(v)) {
3804 PyErr_SetString(
3805 PyExc_TypeError,
3806 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003807 goto error;
3808 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003809 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003810 if (prec < 0)
3811 prec = 0;
3812 if (--fmtcnt >= 0)
3813 c = *fmt++;
3814 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003815 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003816 prec = c - '0';
3817 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003818 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003819 if (!isdigit(c))
3820 break;
3821 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003822 PyErr_SetString(
3823 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003824 "prec too big");
3825 goto error;
3826 }
3827 prec = prec*10 + (c - '0');
3828 }
3829 }
3830 } /* prec */
3831 if (fmtcnt >= 0) {
3832 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003833 if (--fmtcnt >= 0)
3834 c = *fmt++;
3835 }
3836 }
3837 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003838 PyErr_SetString(PyExc_ValueError,
3839 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003840 goto error;
3841 }
3842 if (c != '%') {
3843 v = getnextarg(args, arglen, &argidx);
3844 if (v == NULL)
3845 goto error;
3846 }
3847 sign = 0;
3848 fill = ' ';
3849 switch (c) {
3850 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003851 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003852 len = 1;
3853 break;
3854 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003855#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003856 if (PyUnicode_Check(v)) {
3857 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003858 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003859 goto unicode;
3860 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003861#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003862 /* Fall through */
3863 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003864 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003865 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003866 else
3867 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003868 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003869 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003870 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003871 /* XXX Note: this should never happen,
3872 since PyObject_Repr() and
3873 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003874 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003875 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003876 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003877 goto error;
3878 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003879 pbuf = PyString_AS_STRING(temp);
3880 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003881 if (prec >= 0 && len > prec)
3882 len = prec;
3883 break;
3884 case 'i':
3885 case 'd':
3886 case 'u':
3887 case 'o':
3888 case 'x':
3889 case 'X':
3890 if (c == 'i')
3891 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003892 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003893 temp = _PyString_FormatLong(v, flags,
3894 prec, c, &pbuf, &len);
3895 if (!temp)
3896 goto error;
3897 /* unbounded ints can always produce
3898 a sign character! */
3899 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003900 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003901 else {
3902 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003903 len = formatint(pbuf,
3904 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003905 flags, prec, c, v);
3906 if (len < 0)
3907 goto error;
3908 /* only d conversion is signed */
3909 sign = c == 'd';
3910 }
3911 if (flags & F_ZERO)
3912 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003913 break;
3914 case 'e':
3915 case 'E':
3916 case 'f':
3917 case 'g':
3918 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003919 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003920 len = formatfloat(pbuf, sizeof(formatbuf),
3921 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003922 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003923 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003924 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003925 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003926 fill = '0';
3927 break;
3928 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003929 pbuf = formatbuf;
3930 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003931 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003932 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003933 break;
3934 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003935 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003936 "unsupported format character '%c' (0x%x) "
3937 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003938 c, c,
3939 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003940 goto error;
3941 }
3942 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003943 if (*pbuf == '-' || *pbuf == '+') {
3944 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003945 len--;
3946 }
3947 else if (flags & F_SIGN)
3948 sign = '+';
3949 else if (flags & F_BLANK)
3950 sign = ' ';
3951 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003952 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003953 }
3954 if (width < len)
3955 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003956 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003957 reslen -= rescnt;
3958 rescnt = width + fmtcnt + 100;
3959 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003960 if (reslen < 0) {
3961 Py_DECREF(result);
3962 return PyErr_NoMemory();
3963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003965 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003966 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003967 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003968 }
3969 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003970 if (fill != ' ')
3971 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003972 rescnt--;
3973 if (width > len)
3974 width--;
3975 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003976 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3977 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003978 assert(pbuf[1] == c);
3979 if (fill != ' ') {
3980 *res++ = *pbuf++;
3981 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003982 }
Tim Petersfff53252001-04-12 18:38:48 +00003983 rescnt -= 2;
3984 width -= 2;
3985 if (width < 0)
3986 width = 0;
3987 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003988 }
3989 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003990 do {
3991 --rescnt;
3992 *res++ = fill;
3993 } while (--width > len);
3994 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003995 if (fill == ' ') {
3996 if (sign)
3997 *res++ = sign;
3998 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003999 (c == 'x' || c == 'X')) {
4000 assert(pbuf[0] == '0');
4001 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004002 *res++ = *pbuf++;
4003 *res++ = *pbuf++;
4004 }
4005 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004006 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004007 res += len;
4008 rescnt -= len;
4009 while (--width >= len) {
4010 --rescnt;
4011 *res++ = ' ';
4012 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004013 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004014 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004015 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004016 goto error;
4017 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004019 } /* '%' */
4020 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004021 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004022 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004023 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004024 goto error;
4025 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004026 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004027 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004028 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004030 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004031
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004032#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004033 unicode:
4034 if (args_owned) {
4035 Py_DECREF(args);
4036 args_owned = 0;
4037 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004038 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004039 if (PyTuple_Check(orig_args) && argidx > 0) {
4040 PyObject *v;
4041 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4042 v = PyTuple_New(n);
4043 if (v == NULL)
4044 goto error;
4045 while (--n >= 0) {
4046 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4047 Py_INCREF(w);
4048 PyTuple_SET_ITEM(v, n, w);
4049 }
4050 args = v;
4051 } else {
4052 Py_INCREF(orig_args);
4053 args = orig_args;
4054 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004055 args_owned = 1;
4056 /* Take what we have of the result and let the Unicode formatting
4057 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004058 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004059 if (_PyString_Resize(&result, rescnt))
4060 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004061 fmtcnt = PyString_GET_SIZE(format) - \
4062 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004063 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4064 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004065 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004066 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004067 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004068 if (v == NULL)
4069 goto error;
4070 /* Paste what we have (result) to what the Unicode formatting
4071 function returned (v) and return the result (or error) */
4072 w = PyUnicode_Concat(result, v);
4073 Py_DECREF(result);
4074 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004075 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004076 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004077#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004078
Guido van Rossume5372401993-03-16 12:15:04 +00004079 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004080 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004081 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004083 }
Guido van Rossume5372401993-03-16 12:15:04 +00004084 return NULL;
4085}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004086
Guido van Rossum2a61e741997-01-18 07:55:05 +00004087void
Fred Drakeba096332000-07-09 07:04:36 +00004088PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004089{
4090 register PyStringObject *s = (PyStringObject *)(*p);
4091 PyObject *t;
4092 if (s == NULL || !PyString_Check(s))
4093 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004095 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004096 if (interned == NULL) {
4097 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004098 if (interned == NULL) {
4099 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004100 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004101 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004102 }
4103 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4104 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004105 Py_DECREF(*p);
4106 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004107 return;
4108 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004109 /* Ensure that only true string objects appear in the intern dict */
4110 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004111 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4112 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004113 if (t == NULL) {
4114 PyErr_Clear();
4115 return;
Tim Peters111f6092001-09-12 07:54:51 +00004116 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004117 } else {
4118 t = (PyObject*) s;
4119 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004120 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004121
4122 if (PyDict_SetItem(interned, t, t) == 0) {
4123 /* The two references in interned are not counted by
4124 refcnt. The string deallocator will take care of this */
4125 ((PyObject *)t)->ob_refcnt-=2;
4126 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4127 Py_DECREF(*p);
4128 *p = t;
4129 return;
4130 }
4131 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004132 PyErr_Clear();
4133}
4134
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004135void
4136PyString_InternImmortal(PyObject **p)
4137{
4138 PyString_InternInPlace(p);
4139 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4140 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4141 Py_INCREF(*p);
4142 }
4143}
4144
Guido van Rossum2a61e741997-01-18 07:55:05 +00004145
4146PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004147PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004148{
4149 PyObject *s = PyString_FromString(cp);
4150 if (s == NULL)
4151 return NULL;
4152 PyString_InternInPlace(&s);
4153 return s;
4154}
4155
Guido van Rossum8cf04761997-08-02 02:57:45 +00004156void
Fred Drakeba096332000-07-09 07:04:36 +00004157PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004158{
4159 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004160 for (i = 0; i < UCHAR_MAX + 1; i++) {
4161 Py_XDECREF(characters[i]);
4162 characters[i] = NULL;
4163 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004164 Py_XDECREF(nullstring);
4165 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004166}
Barry Warsawa903ad982001-02-23 16:40:48 +00004167
Barry Warsawa903ad982001-02-23 16:40:48 +00004168void _Py_ReleaseInternedStrings(void)
4169{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004170 PyObject *keys;
4171 PyStringObject *s;
4172 int i, n;
4173
4174 if (interned == NULL || !PyDict_Check(interned))
4175 return;
4176 keys = PyDict_Keys(interned);
4177 if (keys == NULL || !PyList_Check(keys)) {
4178 PyErr_Clear();
4179 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004180 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004181
4182 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4183 detector, interned strings are not forcibly deallocated; rather, we
4184 give them their stolen references back, and then clear and DECREF
4185 the interned dict. */
4186
4187 fprintf(stderr, "releasing interned strings\n");
4188 n = PyList_GET_SIZE(keys);
4189 for (i = 0; i < n; i++) {
4190 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4191 switch (s->ob_sstate) {
4192 case SSTATE_NOT_INTERNED:
4193 /* XXX Shouldn't happen */
4194 break;
4195 case SSTATE_INTERNED_IMMORTAL:
4196 s->ob_refcnt += 1;
4197 break;
4198 case SSTATE_INTERNED_MORTAL:
4199 s->ob_refcnt += 2;
4200 break;
4201 default:
4202 Py_FatalError("Inconsistent interned string state.");
4203 }
4204 s->ob_sstate = SSTATE_NOT_INTERNED;
4205 }
4206 Py_DECREF(keys);
4207 PyDict_Clear(interned);
4208 Py_DECREF(interned);
4209 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004210}