blob: acfce8b1047a1d3ba6e0d02c892fde253928475a [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000936 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000937 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000939 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 if (n < 0)
941 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000942 /* watch out for overflows: the size can overflow int,
943 * and the # of bytes needed can overflow size_t
944 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000946 if (n && size / n != a->ob_size) {
947 PyErr_SetString(PyExc_OverflowError,
948 "repeated string is too long");
949 return NULL;
950 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000951 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
Tim Peters8f422462000-09-09 06:13:41 +0000955 nbytes = size * sizeof(char);
956 if (nbytes / sizeof(char) != (size_t)size ||
957 nbytes + sizeof(PyStringObject) <= nbytes) {
958 PyErr_SetString(PyExc_OverflowError,
959 "repeated string is too long");
960 return NULL;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000963 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000969 i = 0;
970 if (i < size) {
971 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
972 i = (int) a->ob_size;
973 }
974 while (i < size) {
975 j = (i <= size-i) ? i : size-i;
976 memcpy(op->ob_sval+i, op->ob_sval, j);
977 i += j;
978 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000979 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981}
982
983/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000986string_slice(register PyStringObject *a, register int i, register int j)
987 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988{
989 if (i < 0)
990 i = 0;
991 if (j < 0)
992 j = 0; /* Avoid signed/unsigned bug in next line */
993 if (j > a->ob_size)
994 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
996 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
1000 if (j < i)
1001 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003}
1004
Guido van Rossum9284a572000-03-07 15:53:43 +00001005static int
Fred Drakeba096332000-07-09 07:04:36 +00001006string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001007{
Barry Warsaw817918c2002-08-06 16:58:21 +00001008 const char *lhs, *rhs, *end;
1009 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001010
1011 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001012#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 if (PyUnicode_Check(el))
1014 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001015#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001016 if (!PyString_Check(el)) {
1017 PyErr_SetString(PyExc_TypeError,
1018 "'in <string>' requires string as left operand");
1019 return -1;
1020 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001021 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001022 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001023 rhs = PyString_AS_STRING(el);
1024 lhs = PyString_AS_STRING(a);
1025
1026 /* optimize for a single character */
1027 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001028 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001029
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001030 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001031 while (lhs <= end) {
1032 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001033 return 1;
1034 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001035
Guido van Rossum9284a572000-03-07 15:53:43 +00001036 return 0;
1037}
1038
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001040string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001041{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001043 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046 return NULL;
1047 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001048 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001049 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001050 if (v == NULL)
1051 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001052 else {
1053#ifdef COUNT_ALLOCS
1054 one_strings++;
1055#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001056 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001057 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001058 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059}
1060
Martin v. Löwiscd353062001-05-24 16:56:35 +00001061static PyObject*
1062string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001064 int c;
1065 int len_a, len_b;
1066 int min_len;
1067 PyObject *result;
1068
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001069 /* Make sure both arguments are strings. */
1070 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071 result = Py_NotImplemented;
1072 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001073 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 if (a == b) {
1075 switch (op) {
1076 case Py_EQ:case Py_LE:case Py_GE:
1077 result = Py_True;
1078 goto out;
1079 case Py_NE:case Py_LT:case Py_GT:
1080 result = Py_False;
1081 goto out;
1082 }
1083 }
1084 if (op == Py_EQ) {
1085 /* Supporting Py_NE here as well does not save
1086 much time, since Py_NE is rarely used. */
1087 if (a->ob_size == b->ob_size
1088 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001089 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 a->ob_size) == 0)) {
1091 result = Py_True;
1092 } else {
1093 result = Py_False;
1094 }
1095 goto out;
1096 }
1097 len_a = a->ob_size; len_b = b->ob_size;
1098 min_len = (len_a < len_b) ? len_a : len_b;
1099 if (min_len > 0) {
1100 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1101 if (c==0)
1102 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1103 }else
1104 c = 0;
1105 if (c == 0)
1106 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1107 switch (op) {
1108 case Py_LT: c = c < 0; break;
1109 case Py_LE: c = c <= 0; break;
1110 case Py_EQ: assert(0); break; /* unreachable */
1111 case Py_NE: c = c != 0; break;
1112 case Py_GT: c = c > 0; break;
1113 case Py_GE: c = c >= 0; break;
1114 default:
1115 result = Py_NotImplemented;
1116 goto out;
1117 }
1118 result = c ? Py_True : Py_False;
1119 out:
1120 Py_INCREF(result);
1121 return result;
1122}
1123
1124int
1125_PyString_Eq(PyObject *o1, PyObject *o2)
1126{
1127 PyStringObject *a, *b;
1128 a = (PyStringObject*)o1;
1129 b = (PyStringObject*)o2;
1130 return a->ob_size == b->ob_size
1131 && *a->ob_sval == *b->ob_sval
1132 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001133}
1134
Guido van Rossum9bfef441993-03-29 10:43:31 +00001135static long
Fred Drakeba096332000-07-09 07:04:36 +00001136string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001137{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 register int len;
1139 register unsigned char *p;
1140 register long x;
1141
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 if (a->ob_shash != -1)
1143 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001144 len = a->ob_size;
1145 p = (unsigned char *) a->ob_sval;
1146 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001148 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 x ^= a->ob_size;
1150 if (x == -1)
1151 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 return x;
1154}
1155
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001156static PyObject*
1157string_subscript(PyStringObject* self, PyObject* item)
1158{
1159 if (PyInt_Check(item)) {
1160 long i = PyInt_AS_LONG(item);
1161 if (i < 0)
1162 i += PyString_GET_SIZE(self);
1163 return string_item(self,i);
1164 }
1165 else if (PyLong_Check(item)) {
1166 long i = PyLong_AsLong(item);
1167 if (i == -1 && PyErr_Occurred())
1168 return NULL;
1169 if (i < 0)
1170 i += PyString_GET_SIZE(self);
1171 return string_item(self,i);
1172 }
1173 else if (PySlice_Check(item)) {
1174 int start, stop, step, slicelength, cur, i;
1175 char* source_buf;
1176 char* result_buf;
1177 PyObject* result;
1178
1179 if (PySlice_GetIndicesEx((PySliceObject*)item,
1180 PyString_GET_SIZE(self),
1181 &start, &stop, &step, &slicelength) < 0) {
1182 return NULL;
1183 }
1184
1185 if (slicelength <= 0) {
1186 return PyString_FromStringAndSize("", 0);
1187 }
1188 else {
1189 source_buf = PyString_AsString((PyObject*)self);
1190 result_buf = PyMem_Malloc(slicelength);
1191
1192 for (cur = start, i = 0; i < slicelength;
1193 cur += step, i++) {
1194 result_buf[i] = source_buf[cur];
1195 }
1196
1197 result = PyString_FromStringAndSize(result_buf,
1198 slicelength);
1199 PyMem_Free(result_buf);
1200 return result;
1201 }
1202 }
1203 else {
1204 PyErr_SetString(PyExc_TypeError,
1205 "string indices must be integers");
1206 return NULL;
1207 }
1208}
1209
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001210static int
Fred Drakeba096332000-07-09 07:04:36 +00001211string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212{
1213 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001214 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001215 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216 return -1;
1217 }
1218 *ptr = (void *)self->ob_sval;
1219 return self->ob_size;
1220}
1221
1222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224{
Guido van Rossum045e6881997-09-08 18:30:11 +00001225 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001226 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001227 return -1;
1228}
1229
1230static int
Fred Drakeba096332000-07-09 07:04:36 +00001231string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001232{
1233 if ( lenp )
1234 *lenp = self->ob_size;
1235 return 1;
1236}
1237
Guido van Rossum1db70701998-10-08 02:18:52 +00001238static int
Fred Drakeba096332000-07-09 07:04:36 +00001239string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001240{
1241 if ( index != 0 ) {
1242 PyErr_SetString(PyExc_SystemError,
1243 "accessing non-existent string segment");
1244 return -1;
1245 }
1246 *ptr = self->ob_sval;
1247 return self->ob_size;
1248}
1249
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001250static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001251 (inquiry)string_length, /*sq_length*/
1252 (binaryfunc)string_concat, /*sq_concat*/
1253 (intargfunc)string_repeat, /*sq_repeat*/
1254 (intargfunc)string_item, /*sq_item*/
1255 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001256 0, /*sq_ass_item*/
1257 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001258 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001259};
1260
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001261static PyMappingMethods string_as_mapping = {
1262 (inquiry)string_length,
1263 (binaryfunc)string_subscript,
1264 0,
1265};
1266
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267static PyBufferProcs string_as_buffer = {
1268 (getreadbufferproc)string_buffer_getreadbuf,
1269 (getwritebufferproc)string_buffer_getwritebuf,
1270 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001271 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272};
1273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274
1275
1276#define LEFTSTRIP 0
1277#define RIGHTSTRIP 1
1278#define BOTHSTRIP 2
1279
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001281static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1282
1283#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285
1286static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001287split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 PyObject* item;
1291 PyObject *list = PyList_New(0);
1292
1293 if (list == NULL)
1294 return NULL;
1295
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 for (i = j = 0; i < len; ) {
1297 while (i < len && isspace(Py_CHARMASK(s[i])))
1298 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 while (i < len && !isspace(Py_CHARMASK(s[i])))
1301 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303 if (maxsplit-- <= 0)
1304 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1306 if (item == NULL)
1307 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308 err = PyList_Append(list, item);
1309 Py_DECREF(item);
1310 if (err < 0)
1311 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 while (i < len && isspace(Py_CHARMASK(s[i])))
1313 i++;
1314 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315 }
1316 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001317 if (j < len) {
1318 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1319 if (item == NULL)
1320 goto finally;
1321 err = PyList_Append(list, item);
1322 Py_DECREF(item);
1323 if (err < 0)
1324 goto finally;
1325 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326 return list;
1327 finally:
1328 Py_DECREF(list);
1329 return NULL;
1330}
1331
1332
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001333PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334"S.split([sep [,maxsplit]]) -> list of strings\n\
1335\n\
1336Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001338splits are done. If sep is not specified or is None, any\n\
1339whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340
1341static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001342string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343{
1344 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 int maxsplit = -1;
1346 const char *s = PyString_AS_STRING(self), *sub;
1347 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 if (maxsplit < 0)
1352 maxsplit = INT_MAX;
1353 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (PyString_Check(subobj)) {
1356 sub = PyString_AS_STRING(subobj);
1357 n = PyString_GET_SIZE(subobj);
1358 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001359#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 else if (PyUnicode_Check(subobj))
1361 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001362#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1364 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 if (n == 0) {
1366 PyErr_SetString(PyExc_ValueError, "empty separator");
1367 return NULL;
1368 }
1369
1370 list = PyList_New(0);
1371 if (list == NULL)
1372 return NULL;
1373
1374 i = j = 0;
1375 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001376 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377 if (maxsplit-- <= 0)
1378 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1380 if (item == NULL)
1381 goto fail;
1382 err = PyList_Append(list, item);
1383 Py_DECREF(item);
1384 if (err < 0)
1385 goto fail;
1386 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387 }
1388 else
1389 i++;
1390 }
1391 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1392 if (item == NULL)
1393 goto fail;
1394 err = PyList_Append(list, item);
1395 Py_DECREF(item);
1396 if (err < 0)
1397 goto fail;
1398
1399 return list;
1400
1401 fail:
1402 Py_DECREF(list);
1403 return NULL;
1404}
1405
1406
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001407PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408"S.join(sequence) -> string\n\
1409\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001410Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001411sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412
1413static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001414string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415{
1416 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001417 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 char *p;
1420 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001421 size_t sz = 0;
1422 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001423 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 seq = PySequence_Fast(orig, "");
1426 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001427 if (PyErr_ExceptionMatches(PyExc_TypeError))
1428 PyErr_Format(PyExc_TypeError,
1429 "sequence expected, %.80s found",
1430 orig->ob_type->tp_name);
1431 return NULL;
1432 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001433
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001434 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001435 if (seqlen == 0) {
1436 Py_DECREF(seq);
1437 return PyString_FromString("");
1438 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001440 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001441 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1442 PyErr_Format(PyExc_TypeError,
1443 "sequence item 0: expected string,"
1444 " %.80s found",
1445 item->ob_type->tp_name);
1446 Py_DECREF(seq);
1447 return NULL;
1448 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001449 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001450 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001453
Tim Peters19fe14e2001-01-19 03:03:47 +00001454 /* There are at least two things to join. Do a pre-pass to figure out
1455 * the total amount of space we'll need (sz), see whether any argument
1456 * is absurd, and defer to the Unicode join if appropriate.
1457 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001458 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001459 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001460 item = PySequence_Fast_GET_ITEM(seq, i);
1461 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001462#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001463 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001464 /* Defer to Unicode join.
1465 * CAUTION: There's no gurantee that the
1466 * original sequence can be iterated over
1467 * again, so we must pass seq here.
1468 */
1469 PyObject *result;
1470 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001471 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001472 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001473 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001474#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001475 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001476 "sequence item %i: expected string,"
1477 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001478 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001479 Py_DECREF(seq);
1480 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001481 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 sz += PyString_GET_SIZE(item);
1483 if (i != 0)
1484 sz += seplen;
1485 if (sz < old_sz || sz > INT_MAX) {
1486 PyErr_SetString(PyExc_OverflowError,
1487 "join() is too long for a Python string");
1488 Py_DECREF(seq);
1489 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001491 }
1492
1493 /* Allocate result space. */
1494 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1495 if (res == NULL) {
1496 Py_DECREF(seq);
1497 return NULL;
1498 }
1499
1500 /* Catenate everything. */
1501 p = PyString_AS_STRING(res);
1502 for (i = 0; i < seqlen; ++i) {
1503 size_t n;
1504 item = PySequence_Fast_GET_ITEM(seq, i);
1505 n = PyString_GET_SIZE(item);
1506 memcpy(p, PyString_AS_STRING(item), n);
1507 p += n;
1508 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001509 memcpy(p, sep, seplen);
1510 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001511 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001513
Jeremy Hylton49048292000-07-11 03:28:17 +00001514 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516}
1517
Tim Peters52e155e2001-06-16 05:42:57 +00001518PyObject *
1519_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001520{
Tim Petersa7259592001-06-16 05:11:17 +00001521 assert(sep != NULL && PyString_Check(sep));
1522 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001523 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001524}
1525
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001526static void
1527string_adjust_indices(int *start, int *end, int len)
1528{
1529 if (*end > len)
1530 *end = len;
1531 else if (*end < 0)
1532 *end += len;
1533 if (*end < 0)
1534 *end = 0;
1535 if (*start < 0)
1536 *start += len;
1537 if (*start < 0)
1538 *start = 0;
1539}
1540
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541static long
Fred Drakeba096332000-07-09 07:04:36 +00001542string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001544 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 int len = PyString_GET_SIZE(self);
1546 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001549 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001550 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 return -2;
1552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 n = PyString_GET_SIZE(subobj);
1555 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001556#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001558 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001559#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 return -2;
1562
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001563 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 if (dir > 0) {
1566 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 last -= n;
1569 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001570 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 return (long)i;
1572 }
1573 else {
1574 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001575
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576 if (n == 0 && i <= last)
1577 return (long)last;
1578 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001579 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 return (long)j;
1581 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001582
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 return -1;
1584}
1585
1586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.find(sub [,start [,end]]) -> int\n\
1589\n\
1590Return the lowest index in S where substring sub is found,\n\
1591such that sub is contained within s[start,end]. Optional\n\
1592arguments start and end are interpreted as in slice notation.\n\
1593\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001594Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595
1596static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001597string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001599 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 if (result == -2)
1601 return NULL;
1602 return PyInt_FromLong(result);
1603}
1604
1605
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001606PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607"S.index(sub [,start [,end]]) -> int\n\
1608\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001609Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610
1611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001612string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 if (result == -2)
1616 return NULL;
1617 if (result == -1) {
1618 PyErr_SetString(PyExc_ValueError,
1619 "substring not found in string.index");
1620 return NULL;
1621 }
1622 return PyInt_FromLong(result);
1623}
1624
1625
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001626PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627"S.rfind(sub [,start [,end]]) -> int\n\
1628\n\
1629Return the highest index in S where substring sub is found,\n\
1630such that sub is contained within s[start,end]. Optional\n\
1631arguments start and end are interpreted as in slice notation.\n\
1632\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001633Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634
1635static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001636string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001638 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 if (result == -2)
1640 return NULL;
1641 return PyInt_FromLong(result);
1642}
1643
1644
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001645PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646"S.rindex(sub [,start [,end]]) -> int\n\
1647\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001648Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649
1650static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001651string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001652{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001653 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 if (result == -2)
1655 return NULL;
1656 if (result == -1) {
1657 PyErr_SetString(PyExc_ValueError,
1658 "substring not found in string.rindex");
1659 return NULL;
1660 }
1661 return PyInt_FromLong(result);
1662}
1663
1664
1665static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001666do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1667{
1668 char *s = PyString_AS_STRING(self);
1669 int len = PyString_GET_SIZE(self);
1670 char *sep = PyString_AS_STRING(sepobj);
1671 int seplen = PyString_GET_SIZE(sepobj);
1672 int i, j;
1673
1674 i = 0;
1675 if (striptype != RIGHTSTRIP) {
1676 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1677 i++;
1678 }
1679 }
1680
1681 j = len;
1682 if (striptype != LEFTSTRIP) {
1683 do {
1684 j--;
1685 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1686 j++;
1687 }
1688
1689 if (i == 0 && j == len && PyString_CheckExact(self)) {
1690 Py_INCREF(self);
1691 return (PyObject*)self;
1692 }
1693 else
1694 return PyString_FromStringAndSize(s+i, j-i);
1695}
1696
1697
1698static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001699do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700{
1701 char *s = PyString_AS_STRING(self);
1702 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704 i = 0;
1705 if (striptype != RIGHTSTRIP) {
1706 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1707 i++;
1708 }
1709 }
1710
1711 j = len;
1712 if (striptype != LEFTSTRIP) {
1713 do {
1714 j--;
1715 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1716 j++;
1717 }
1718
Tim Peters8fa5dd02001-09-12 02:18:30 +00001719 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 Py_INCREF(self);
1721 return (PyObject*)self;
1722 }
1723 else
1724 return PyString_FromStringAndSize(s+i, j-i);
1725}
1726
1727
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001728static PyObject *
1729do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1730{
1731 PyObject *sep = NULL;
1732
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001733 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001734 return NULL;
1735
1736 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001737 if (PyString_Check(sep))
1738 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001739#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001740 else if (PyUnicode_Check(sep)) {
1741 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1742 PyObject *res;
1743 if (uniself==NULL)
1744 return NULL;
1745 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1746 striptype, sep);
1747 Py_DECREF(uniself);
1748 return res;
1749 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001750#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001751 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001752 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001753#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001754 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001755#else
1756 "%s arg must be None or str",
1757#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001758 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001759 return NULL;
1760 }
1761 return do_xstrip(self, striptype, sep);
1762 }
1763
1764 return do_strip(self, striptype);
1765}
1766
1767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001768PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001769"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770\n\
1771Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001772whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001773If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001774If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775
1776static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001777string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001779 if (PyTuple_GET_SIZE(args) == 0)
1780 return do_strip(self, BOTHSTRIP); /* Common case */
1781 else
1782 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783}
1784
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001787"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001789Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001790If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792
1793static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001794string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001796 if (PyTuple_GET_SIZE(args) == 0)
1797 return do_strip(self, LEFTSTRIP); /* Common case */
1798 else
1799 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800}
1801
1802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001803PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001804"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001806Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001807If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809
1810static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001811string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001813 if (PyTuple_GET_SIZE(args) == 0)
1814 return do_strip(self, RIGHTSTRIP); /* Common case */
1815 else
1816 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817}
1818
1819
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001820PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821"S.lower() -> string\n\
1822\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001823Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
1825static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001826string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827{
1828 char *s = PyString_AS_STRING(self), *s_new;
1829 int i, n = PyString_GET_SIZE(self);
1830 PyObject *new;
1831
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 new = PyString_FromStringAndSize(NULL, n);
1833 if (new == NULL)
1834 return NULL;
1835 s_new = PyString_AsString(new);
1836 for (i = 0; i < n; i++) {
1837 int c = Py_CHARMASK(*s++);
1838 if (isupper(c)) {
1839 *s_new = tolower(c);
1840 } else
1841 *s_new = c;
1842 s_new++;
1843 }
1844 return new;
1845}
1846
1847
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001848PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849"S.upper() -> string\n\
1850\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001851Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
1853static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001854string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855{
1856 char *s = PyString_AS_STRING(self), *s_new;
1857 int i, n = PyString_GET_SIZE(self);
1858 PyObject *new;
1859
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860 new = PyString_FromStringAndSize(NULL, n);
1861 if (new == NULL)
1862 return NULL;
1863 s_new = PyString_AsString(new);
1864 for (i = 0; i < n; i++) {
1865 int c = Py_CHARMASK(*s++);
1866 if (islower(c)) {
1867 *s_new = toupper(c);
1868 } else
1869 *s_new = c;
1870 s_new++;
1871 }
1872 return new;
1873}
1874
1875
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001876PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877"S.title() -> string\n\
1878\n\
1879Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001880characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881
1882static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001883string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001884{
1885 char *s = PyString_AS_STRING(self), *s_new;
1886 int i, n = PyString_GET_SIZE(self);
1887 int previous_is_cased = 0;
1888 PyObject *new;
1889
Guido van Rossum4c08d552000-03-10 22:55:18 +00001890 new = PyString_FromStringAndSize(NULL, n);
1891 if (new == NULL)
1892 return NULL;
1893 s_new = PyString_AsString(new);
1894 for (i = 0; i < n; i++) {
1895 int c = Py_CHARMASK(*s++);
1896 if (islower(c)) {
1897 if (!previous_is_cased)
1898 c = toupper(c);
1899 previous_is_cased = 1;
1900 } else if (isupper(c)) {
1901 if (previous_is_cased)
1902 c = tolower(c);
1903 previous_is_cased = 1;
1904 } else
1905 previous_is_cased = 0;
1906 *s_new++ = c;
1907 }
1908 return new;
1909}
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.capitalize() -> string\n\
1913\n\
1914Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916
1917static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001918string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919{
1920 char *s = PyString_AS_STRING(self), *s_new;
1921 int i, n = PyString_GET_SIZE(self);
1922 PyObject *new;
1923
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 new = PyString_FromStringAndSize(NULL, n);
1925 if (new == NULL)
1926 return NULL;
1927 s_new = PyString_AsString(new);
1928 if (0 < n) {
1929 int c = Py_CHARMASK(*s++);
1930 if (islower(c))
1931 *s_new = toupper(c);
1932 else
1933 *s_new = c;
1934 s_new++;
1935 }
1936 for (i = 1; i < n; i++) {
1937 int c = Py_CHARMASK(*s++);
1938 if (isupper(c))
1939 *s_new = tolower(c);
1940 else
1941 *s_new = c;
1942 s_new++;
1943 }
1944 return new;
1945}
1946
1947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001948PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949"S.count(sub[, start[, end]]) -> int\n\
1950\n\
1951Return the number of occurrences of substring sub in string\n\
1952S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001953interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
1955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001956string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001958 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 int len = PyString_GET_SIZE(self), n;
1960 int i = 0, last = INT_MAX;
1961 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
Guido van Rossumc6821402000-05-08 14:08:05 +00001964 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1965 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001967
Guido van Rossum4c08d552000-03-10 22:55:18 +00001968 if (PyString_Check(subobj)) {
1969 sub = PyString_AS_STRING(subobj);
1970 n = PyString_GET_SIZE(subobj);
1971 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001972#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001973 else if (PyUnicode_Check(subobj)) {
1974 int count;
1975 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1976 if (count == -1)
1977 return NULL;
1978 else
1979 return PyInt_FromLong((long) count);
1980 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001981#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001982 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1983 return NULL;
1984
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001985 string_adjust_indices(&i, &last, len);
1986
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 m = last + 1 - n;
1988 if (n == 0)
1989 return PyInt_FromLong((long) (m-i));
1990
1991 r = 0;
1992 while (i < m) {
1993 if (!memcmp(s+i, sub, n)) {
1994 r++;
1995 i += n;
1996 } else {
1997 i++;
1998 }
1999 }
2000 return PyInt_FromLong((long) r);
2001}
2002
2003
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002004PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005"S.swapcase() -> string\n\
2006\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002007Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009
2010static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002011string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012{
2013 char *s = PyString_AS_STRING(self), *s_new;
2014 int i, n = PyString_GET_SIZE(self);
2015 PyObject *new;
2016
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017 new = PyString_FromStringAndSize(NULL, n);
2018 if (new == NULL)
2019 return NULL;
2020 s_new = PyString_AsString(new);
2021 for (i = 0; i < n; i++) {
2022 int c = Py_CHARMASK(*s++);
2023 if (islower(c)) {
2024 *s_new = toupper(c);
2025 }
2026 else if (isupper(c)) {
2027 *s_new = tolower(c);
2028 }
2029 else
2030 *s_new = c;
2031 s_new++;
2032 }
2033 return new;
2034}
2035
2036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002037PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038"S.translate(table [,deletechars]) -> string\n\
2039\n\
2040Return a copy of the string S, where all characters occurring\n\
2041in the optional argument deletechars are removed, and the\n\
2042remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002043translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044
2045static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002046string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 register char *input, *output;
2049 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050 register int i, c, changed = 0;
2051 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053 int inlen, tablen, dellen = 0;
2054 PyObject *result;
2055 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002058 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002059 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061
2062 if (PyString_Check(tableobj)) {
2063 table1 = PyString_AS_STRING(tableobj);
2064 tablen = PyString_GET_SIZE(tableobj);
2065 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002066#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002068 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 parameter; instead a mapping to None will cause characters
2070 to be deleted. */
2071 if (delobj != NULL) {
2072 PyErr_SetString(PyExc_TypeError,
2073 "deletions are implemented differently for unicode");
2074 return NULL;
2075 }
2076 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2077 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002078#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081
Martin v. Löwis00b61272002-12-12 20:03:19 +00002082 if (tablen != 256) {
2083 PyErr_SetString(PyExc_ValueError,
2084 "translation table must be 256 characters long");
2085 return NULL;
2086 }
2087
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 if (delobj != NULL) {
2089 if (PyString_Check(delobj)) {
2090 del_table = PyString_AS_STRING(delobj);
2091 dellen = PyString_GET_SIZE(delobj);
2092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002093#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 else if (PyUnicode_Check(delobj)) {
2095 PyErr_SetString(PyExc_TypeError,
2096 "deletions are implemented differently for unicode");
2097 return NULL;
2098 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002099#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002100 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2101 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 }
2103 else {
2104 del_table = NULL;
2105 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106 }
2107
2108 table = table1;
2109 inlen = PyString_Size(input_obj);
2110 result = PyString_FromStringAndSize((char *)NULL, inlen);
2111 if (result == NULL)
2112 return NULL;
2113 output_start = output = PyString_AsString(result);
2114 input = PyString_AsString(input_obj);
2115
2116 if (dellen == 0) {
2117 /* If no deletions are required, use faster code */
2118 for (i = inlen; --i >= 0; ) {
2119 c = Py_CHARMASK(*input++);
2120 if (Py_CHARMASK((*output++ = table[c])) != c)
2121 changed = 1;
2122 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002123 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124 return result;
2125 Py_DECREF(result);
2126 Py_INCREF(input_obj);
2127 return input_obj;
2128 }
2129
2130 for (i = 0; i < 256; i++)
2131 trans_table[i] = Py_CHARMASK(table[i]);
2132
2133 for (i = 0; i < dellen; i++)
2134 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2135
2136 for (i = inlen; --i >= 0; ) {
2137 c = Py_CHARMASK(*input++);
2138 if (trans_table[c] != -1)
2139 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2140 continue;
2141 changed = 1;
2142 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002143 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144 Py_DECREF(result);
2145 Py_INCREF(input_obj);
2146 return input_obj;
2147 }
2148 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002149 if (inlen > 0)
2150 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 return result;
2152}
2153
2154
2155/* What follows is used for implementing replace(). Perry Stoll. */
2156
2157/*
2158 mymemfind
2159
2160 strstr replacement for arbitrary blocks of memory.
2161
Barry Warsaw51ac5802000-03-20 16:36:48 +00002162 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163 contents of memory pointed to by PAT. Returns the index into MEM if
2164 found, or -1 if not found. If len of PAT is greater than length of
2165 MEM, the function returns -1.
2166*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002167static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002168mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169{
2170 register int ii;
2171
2172 /* pattern can not occur in the last pat_len-1 chars */
2173 len -= pat_len;
2174
2175 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002176 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177 return ii;
2178 }
2179 }
2180 return -1;
2181}
2182
2183/*
2184 mymemcnt
2185
2186 Return the number of distinct times PAT is found in MEM.
2187 meaning mem=1111 and pat==11 returns 2.
2188 mem=11111 and pat==11 also return 2.
2189 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002190static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002191mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192{
2193 register int offset = 0;
2194 int nfound = 0;
2195
2196 while (len >= 0) {
2197 offset = mymemfind(mem, len, pat, pat_len);
2198 if (offset == -1)
2199 break;
2200 mem += offset + pat_len;
2201 len -= offset + pat_len;
2202 nfound++;
2203 }
2204 return nfound;
2205}
2206
2207/*
2208 mymemreplace
2209
Thomas Wouters7e474022000-07-16 12:04:32 +00002210 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211 replaced with SUB.
2212
Thomas Wouters7e474022000-07-16 12:04:32 +00002213 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 of PAT in STR, then the original string is returned. Otherwise, a new
2215 string is allocated here and returned.
2216
2217 on return, out_len is:
2218 the length of output string, or
2219 -1 if the input string is returned, or
2220 unchanged if an error occurs (no memory).
2221
2222 return value is:
2223 the new string allocated locally, or
2224 NULL if an error occurred.
2225*/
2226static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002227mymemreplace(const char *str, int len, /* input string */
2228 const char *pat, int pat_len, /* pattern string to find */
2229 const char *sub, int sub_len, /* substitution string */
2230 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002231 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 char *out_s;
2234 char *new_s;
2235 int nfound, offset, new_len;
2236
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002237 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238 goto return_same;
2239
2240 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002241 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002242 if (count < 0)
2243 count = INT_MAX;
2244 else if (nfound > count)
2245 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 if (nfound == 0)
2247 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002248
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002250 if (new_len == 0) {
2251 /* Have to allocate something for the caller to free(). */
2252 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002253 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254 return NULL;
2255 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002257 else {
2258 assert(new_len > 0);
2259 new_s = (char *)PyMem_MALLOC(new_len);
2260 if (new_s == NULL)
2261 return NULL;
2262 out_s = new_s;
2263
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002264 if (pat_len > 0) {
2265 for (; nfound > 0; --nfound) {
2266 /* find index of next instance of pattern */
2267 offset = mymemfind(str, len, pat, pat_len);
2268 if (offset == -1)
2269 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002270
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002271 /* copy non matching part of input string */
2272 memcpy(new_s, str, offset);
2273 str += offset + pat_len;
2274 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002275
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002276 /* copy substitute into the output string */
2277 new_s += offset;
2278 memcpy(new_s, sub, sub_len);
2279 new_s += sub_len;
2280 }
2281 /* copy any remaining values into output string */
2282 if (len > 0)
2283 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002284 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002285 else {
2286 for (;;++str, --len) {
2287 memcpy(new_s, sub, sub_len);
2288 new_s += sub_len;
2289 if (--nfound <= 0) {
2290 memcpy(new_s, str, len);
2291 break;
2292 }
2293 *new_s++ = *str;
2294 }
2295 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002296 }
2297 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298 return out_s;
2299
2300 return_same:
2301 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002302 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303}
2304
2305
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002306PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307"S.replace (old, new[, maxsplit]) -> string\n\
2308\n\
2309Return a copy of string S with all occurrences of substring\n\
2310old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002311given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312
2313static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002314string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002316 const char *str = PyString_AS_STRING(self), *sub, *repl;
2317 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002318 const int len = PyString_GET_SIZE(self);
2319 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 if (!PyArg_ParseTuple(args, "OO|i:replace",
2325 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327
2328 if (PyString_Check(subobj)) {
2329 sub = PyString_AS_STRING(subobj);
2330 sub_len = PyString_GET_SIZE(subobj);
2331 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002332#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002334 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002336#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2338 return NULL;
2339
2340 if (PyString_Check(replobj)) {
2341 repl = PyString_AS_STRING(replobj);
2342 repl_len = PyString_GET_SIZE(replobj);
2343 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002344#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002346 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002348#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2350 return NULL;
2351
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 if (new_s == NULL) {
2354 PyErr_NoMemory();
2355 return NULL;
2356 }
2357 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002358 if (PyString_CheckExact(self)) {
2359 /* we're returning another reference to self */
2360 new = (PyObject*)self;
2361 Py_INCREF(new);
2362 }
2363 else {
2364 new = PyString_FromStringAndSize(str, len);
2365 if (new == NULL)
2366 return NULL;
2367 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 }
2369 else {
2370 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002371 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 }
2373 return new;
2374}
2375
2376
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002377PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002378"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002380Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002382comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383
2384static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002385string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390 int plen;
2391 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002392 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394
Guido van Rossumc6821402000-05-08 14:08:05 +00002395 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2396 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 return NULL;
2398 if (PyString_Check(subobj)) {
2399 prefix = PyString_AS_STRING(subobj);
2400 plen = PyString_GET_SIZE(subobj);
2401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002402#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002403 else if (PyUnicode_Check(subobj)) {
2404 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002405 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002406 subobj, start, end, -1);
2407 if (rc == -1)
2408 return NULL;
2409 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002410 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002411 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002412#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414 return NULL;
2415
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002416 string_adjust_indices(&start, &end, len);
2417
2418 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002419 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002421 if (end-start >= plen)
2422 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2423 else
2424 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425}
2426
2427
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002428PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002429"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002431Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
2435static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002436string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002440 const char* suffix;
2441 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002443 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Guido van Rossumc6821402000-05-08 14:08:05 +00002446 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2447 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 return NULL;
2449 if (PyString_Check(subobj)) {
2450 suffix = PyString_AS_STRING(subobj);
2451 slen = PyString_GET_SIZE(subobj);
2452 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002453#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002454 else if (PyUnicode_Check(subobj)) {
2455 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002456 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002457 subobj, start, end, +1);
2458 if (rc == -1)
2459 return NULL;
2460 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002461 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002462 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002463#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465 return NULL;
2466
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002467 string_adjust_indices(&start, &end, len);
2468
2469 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002470 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002472 if (end-slen > start)
2473 start = end - slen;
2474 if (end-start >= slen)
2475 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2476 else
2477 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478}
2479
2480
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002481PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002482"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002483\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002484Encodes S using the codec registered for encoding. encoding defaults\n\
2485to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002486handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002487a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2488'xmlcharrefreplace' as well as any other name registered with\n\
2489codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002490
2491static PyObject *
2492string_encode(PyStringObject *self, PyObject *args)
2493{
2494 char *encoding = NULL;
2495 char *errors = NULL;
2496 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2497 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002498 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2499}
2500
2501
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002502PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002503"S.decode([encoding[,errors]]) -> object\n\
2504\n\
2505Decodes S using the codec registered for encoding. encoding defaults\n\
2506to the default encoding. errors may be given to set a different error\n\
2507handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002508a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2509as well as any other name registerd with codecs.register_error that is\n\
2510able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002511
2512static PyObject *
2513string_decode(PyStringObject *self, PyObject *args)
2514{
2515 char *encoding = NULL;
2516 char *errors = NULL;
2517 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2518 return NULL;
2519 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002520}
2521
2522
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002523PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524"S.expandtabs([tabsize]) -> string\n\
2525\n\
2526Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002527If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528
2529static PyObject*
2530string_expandtabs(PyStringObject *self, PyObject *args)
2531{
2532 const char *e, *p;
2533 char *q;
2534 int i, j;
2535 PyObject *u;
2536 int tabsize = 8;
2537
2538 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2539 return NULL;
2540
Thomas Wouters7e474022000-07-16 12:04:32 +00002541 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 i = j = 0;
2543 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2544 for (p = PyString_AS_STRING(self); p < e; p++)
2545 if (*p == '\t') {
2546 if (tabsize > 0)
2547 j += tabsize - (j % tabsize);
2548 }
2549 else {
2550 j++;
2551 if (*p == '\n' || *p == '\r') {
2552 i += j;
2553 j = 0;
2554 }
2555 }
2556
2557 /* Second pass: create output string and fill it */
2558 u = PyString_FromStringAndSize(NULL, i + j);
2559 if (!u)
2560 return NULL;
2561
2562 j = 0;
2563 q = PyString_AS_STRING(u);
2564
2565 for (p = PyString_AS_STRING(self); p < e; p++)
2566 if (*p == '\t') {
2567 if (tabsize > 0) {
2568 i = tabsize - (j % tabsize);
2569 j += i;
2570 while (i--)
2571 *q++ = ' ';
2572 }
2573 }
2574 else {
2575 j++;
2576 *q++ = *p;
2577 if (*p == '\n' || *p == '\r')
2578 j = 0;
2579 }
2580
2581 return u;
2582}
2583
Tim Peters8fa5dd02001-09-12 02:18:30 +00002584static PyObject *
2585pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586{
2587 PyObject *u;
2588
2589 if (left < 0)
2590 left = 0;
2591 if (right < 0)
2592 right = 0;
2593
Tim Peters8fa5dd02001-09-12 02:18:30 +00002594 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 Py_INCREF(self);
2596 return (PyObject *)self;
2597 }
2598
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002599 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600 left + PyString_GET_SIZE(self) + right);
2601 if (u) {
2602 if (left)
2603 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002604 memcpy(PyString_AS_STRING(u) + left,
2605 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 PyString_GET_SIZE(self));
2607 if (right)
2608 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2609 fill, right);
2610 }
2611
2612 return u;
2613}
2614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002615PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002616"S.ljust(width) -> string\n"
2617"\n"
2618"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002619"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620
2621static PyObject *
2622string_ljust(PyStringObject *self, PyObject *args)
2623{
2624 int width;
2625 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2626 return NULL;
2627
Tim Peters8fa5dd02001-09-12 02:18:30 +00002628 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629 Py_INCREF(self);
2630 return (PyObject*) self;
2631 }
2632
2633 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2634}
2635
2636
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002637PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002638"S.rjust(width) -> string\n"
2639"\n"
2640"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002641"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642
2643static PyObject *
2644string_rjust(PyStringObject *self, PyObject *args)
2645{
2646 int width;
2647 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2648 return NULL;
2649
Tim Peters8fa5dd02001-09-12 02:18:30 +00002650 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651 Py_INCREF(self);
2652 return (PyObject*) self;
2653 }
2654
2655 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2656}
2657
2658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002659PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002660"S.center(width) -> string\n"
2661"\n"
2662"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002663"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664
2665static PyObject *
2666string_center(PyStringObject *self, PyObject *args)
2667{
2668 int marg, left;
2669 int width;
2670
2671 if (!PyArg_ParseTuple(args, "i:center", &width))
2672 return NULL;
2673
Tim Peters8fa5dd02001-09-12 02:18:30 +00002674 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002675 Py_INCREF(self);
2676 return (PyObject*) self;
2677 }
2678
2679 marg = width - PyString_GET_SIZE(self);
2680 left = marg / 2 + (marg & width & 1);
2681
2682 return pad(self, left, marg - left, ' ');
2683}
2684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002685PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002686"S.zfill(width) -> string\n"
2687"\n"
2688"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002689"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002690
2691static PyObject *
2692string_zfill(PyStringObject *self, PyObject *args)
2693{
2694 int fill;
2695 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002696 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002697
2698 int width;
2699 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2700 return NULL;
2701
2702 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002703 if (PyString_CheckExact(self)) {
2704 Py_INCREF(self);
2705 return (PyObject*) self;
2706 }
2707 else
2708 return PyString_FromStringAndSize(
2709 PyString_AS_STRING(self),
2710 PyString_GET_SIZE(self)
2711 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002712 }
2713
2714 fill = width - PyString_GET_SIZE(self);
2715
2716 s = pad(self, fill, 0, '0');
2717
2718 if (s == NULL)
2719 return NULL;
2720
2721 p = PyString_AS_STRING(s);
2722 if (p[fill] == '+' || p[fill] == '-') {
2723 /* move sign to beginning of string */
2724 p[0] = p[fill];
2725 p[fill] = '0';
2726 }
2727
2728 return (PyObject*) s;
2729}
2730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002731PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002732"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002733"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002734"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736
2737static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002738string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002739{
Fred Drakeba096332000-07-09 07:04:36 +00002740 register const unsigned char *p
2741 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002742 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 /* Shortcut for single character strings */
2745 if (PyString_GET_SIZE(self) == 1 &&
2746 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002747 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002749 /* Special case for empty strings */
2750 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002751 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002752
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753 e = p + PyString_GET_SIZE(self);
2754 for (; p < e; p++) {
2755 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002756 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002757 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002758 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002759}
2760
2761
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002762PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002763"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002764\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002765Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002767
2768static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002769string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002770{
Fred Drakeba096332000-07-09 07:04:36 +00002771 register const unsigned char *p
2772 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002773 register const unsigned char *e;
2774
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002775 /* Shortcut for single character strings */
2776 if (PyString_GET_SIZE(self) == 1 &&
2777 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002778 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779
2780 /* Special case for empty strings */
2781 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002782 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002783
2784 e = p + PyString_GET_SIZE(self);
2785 for (; p < e; p++) {
2786 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002787 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002788 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002789 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002790}
2791
2792
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002793PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002794"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002795\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002796Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002797and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002798
2799static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002800string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002801{
Fred Drakeba096332000-07-09 07:04:36 +00002802 register const unsigned char *p
2803 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002804 register const unsigned char *e;
2805
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002806 /* Shortcut for single character strings */
2807 if (PyString_GET_SIZE(self) == 1 &&
2808 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002809 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810
2811 /* Special case for empty strings */
2812 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002813 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002814
2815 e = p + PyString_GET_SIZE(self);
2816 for (; p < e; p++) {
2817 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002818 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002819 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002820 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002821}
2822
2823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002824PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002825"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002826\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002827Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002828False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002829
2830static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002831string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832{
Fred Drakeba096332000-07-09 07:04:36 +00002833 register const unsigned char *p
2834 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002835 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837 /* Shortcut for single character strings */
2838 if (PyString_GET_SIZE(self) == 1 &&
2839 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002840 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002842 /* Special case for empty strings */
2843 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002844 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002845
Guido van Rossum4c08d552000-03-10 22:55:18 +00002846 e = p + PyString_GET_SIZE(self);
2847 for (; p < e; p++) {
2848 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002849 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002851 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852}
2853
2854
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002855PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002856"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002858Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002859at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860
2861static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002862string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002863{
Fred Drakeba096332000-07-09 07:04:36 +00002864 register const unsigned char *p
2865 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002866 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867 int cased;
2868
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 /* Shortcut for single character strings */
2870 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002871 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002873 /* Special case for empty strings */
2874 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002875 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002876
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 e = p + PyString_GET_SIZE(self);
2878 cased = 0;
2879 for (; p < e; p++) {
2880 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002881 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882 else if (!cased && islower(*p))
2883 cased = 1;
2884 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002885 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886}
2887
2888
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002889PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002890"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002892Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002893at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002894
2895static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002896string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897{
Fred Drakeba096332000-07-09 07:04:36 +00002898 register const unsigned char *p
2899 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002900 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901 int cased;
2902
Guido van Rossum4c08d552000-03-10 22:55:18 +00002903 /* Shortcut for single character strings */
2904 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002905 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002907 /* Special case for empty strings */
2908 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002909 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002910
Guido van Rossum4c08d552000-03-10 22:55:18 +00002911 e = p + PyString_GET_SIZE(self);
2912 cased = 0;
2913 for (; p < e; p++) {
2914 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002915 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916 else if (!cased && isupper(*p))
2917 cased = 1;
2918 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002919 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920}
2921
2922
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002923PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002924"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002926Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002927may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002928ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929
2930static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002931string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002932{
Fred Drakeba096332000-07-09 07:04:36 +00002933 register const unsigned char *p
2934 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002935 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002936 int cased, previous_is_cased;
2937
Guido van Rossum4c08d552000-03-10 22:55:18 +00002938 /* Shortcut for single character strings */
2939 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002940 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002941
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002942 /* Special case for empty strings */
2943 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002944 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002945
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946 e = p + PyString_GET_SIZE(self);
2947 cased = 0;
2948 previous_is_cased = 0;
2949 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002950 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002951
2952 if (isupper(ch)) {
2953 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002954 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955 previous_is_cased = 1;
2956 cased = 1;
2957 }
2958 else if (islower(ch)) {
2959 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002960 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961 previous_is_cased = 1;
2962 cased = 1;
2963 }
2964 else
2965 previous_is_cased = 0;
2966 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002967 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002968}
2969
2970
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002971PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002972"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973\n\
2974Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002975Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002976is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977
2978#define SPLIT_APPEND(data, left, right) \
2979 str = PyString_FromStringAndSize(data + left, right - left); \
2980 if (!str) \
2981 goto onError; \
2982 if (PyList_Append(list, str)) { \
2983 Py_DECREF(str); \
2984 goto onError; \
2985 } \
2986 else \
2987 Py_DECREF(str);
2988
2989static PyObject*
2990string_splitlines(PyStringObject *self, PyObject *args)
2991{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002992 register int i;
2993 register int j;
2994 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002995 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996 PyObject *list;
2997 PyObject *str;
2998 char *data;
2999
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003000 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001 return NULL;
3002
3003 data = PyString_AS_STRING(self);
3004 len = PyString_GET_SIZE(self);
3005
Guido van Rossum4c08d552000-03-10 22:55:18 +00003006 list = PyList_New(0);
3007 if (!list)
3008 goto onError;
3009
3010 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003011 int eol;
3012
Guido van Rossum4c08d552000-03-10 22:55:18 +00003013 /* Find a line and append it */
3014 while (i < len && data[i] != '\n' && data[i] != '\r')
3015 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003016
3017 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003018 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003019 if (i < len) {
3020 if (data[i] == '\r' && i + 1 < len &&
3021 data[i+1] == '\n')
3022 i += 2;
3023 else
3024 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003025 if (keepends)
3026 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003027 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003028 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029 j = i;
3030 }
3031 if (j < len) {
3032 SPLIT_APPEND(data, j, len);
3033 }
3034
3035 return list;
3036
3037 onError:
3038 Py_DECREF(list);
3039 return NULL;
3040}
3041
3042#undef SPLIT_APPEND
3043
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003045static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003047 /* Counterparts of the obsolete stropmodule functions; except
3048 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003049 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3050 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3051 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3052 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003053 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3054 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3055 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3056 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3057 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3058 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3059 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003060 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3061 capitalize__doc__},
3062 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3063 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3064 endswith__doc__},
3065 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3066 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3067 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3068 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3069 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3070 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3071 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3072 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3073 startswith__doc__},
3074 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3075 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3076 swapcase__doc__},
3077 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3078 translate__doc__},
3079 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3080 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3081 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3082 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3083 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3084 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3085 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3086 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3087 expandtabs__doc__},
3088 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3089 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003090 {NULL, NULL} /* sentinel */
3091};
3092
Jeremy Hylton938ace62002-07-17 16:30:39 +00003093static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003094str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3095
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003096static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003097string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003099 PyObject *x = NULL;
3100 static char *kwlist[] = {"object", 0};
3101
Guido van Rossumae960af2001-08-30 03:11:59 +00003102 if (type != &PyString_Type)
3103 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003104 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3105 return NULL;
3106 if (x == NULL)
3107 return PyString_FromString("");
3108 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003109}
3110
Guido van Rossumae960af2001-08-30 03:11:59 +00003111static PyObject *
3112str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3113{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003114 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003115 int n;
3116
3117 assert(PyType_IsSubtype(type, &PyString_Type));
3118 tmp = string_new(&PyString_Type, args, kwds);
3119 if (tmp == NULL)
3120 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003121 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003122 n = PyString_GET_SIZE(tmp);
3123 pnew = type->tp_alloc(type, n);
3124 if (pnew != NULL) {
3125 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003126 ((PyStringObject *)pnew)->ob_shash =
3127 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003128 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003129 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003130 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003131 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003132}
3133
Guido van Rossumcacfc072002-05-24 19:01:59 +00003134static PyObject *
3135basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3136{
3137 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003138 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003139 return NULL;
3140}
3141
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003142static PyObject *
3143string_mod(PyObject *v, PyObject *w)
3144{
3145 if (!PyString_Check(v)) {
3146 Py_INCREF(Py_NotImplemented);
3147 return Py_NotImplemented;
3148 }
3149 return PyString_Format(v, w);
3150}
3151
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003152PyDoc_STRVAR(basestring_doc,
3153"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003154
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003155static PyNumberMethods string_as_number = {
3156 0, /*nb_add*/
3157 0, /*nb_subtract*/
3158 0, /*nb_multiply*/
3159 0, /*nb_divide*/
3160 string_mod, /*nb_remainder*/
3161};
3162
3163
Guido van Rossumcacfc072002-05-24 19:01:59 +00003164PyTypeObject PyBaseString_Type = {
3165 PyObject_HEAD_INIT(&PyType_Type)
3166 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003167 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003168 0,
3169 0,
3170 0, /* tp_dealloc */
3171 0, /* tp_print */
3172 0, /* tp_getattr */
3173 0, /* tp_setattr */
3174 0, /* tp_compare */
3175 0, /* tp_repr */
3176 0, /* tp_as_number */
3177 0, /* tp_as_sequence */
3178 0, /* tp_as_mapping */
3179 0, /* tp_hash */
3180 0, /* tp_call */
3181 0, /* tp_str */
3182 0, /* tp_getattro */
3183 0, /* tp_setattro */
3184 0, /* tp_as_buffer */
3185 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3186 basestring_doc, /* tp_doc */
3187 0, /* tp_traverse */
3188 0, /* tp_clear */
3189 0, /* tp_richcompare */
3190 0, /* tp_weaklistoffset */
3191 0, /* tp_iter */
3192 0, /* tp_iternext */
3193 0, /* tp_methods */
3194 0, /* tp_members */
3195 0, /* tp_getset */
3196 &PyBaseObject_Type, /* tp_base */
3197 0, /* tp_dict */
3198 0, /* tp_descr_get */
3199 0, /* tp_descr_set */
3200 0, /* tp_dictoffset */
3201 0, /* tp_init */
3202 0, /* tp_alloc */
3203 basestring_new, /* tp_new */
3204 0, /* tp_free */
3205};
3206
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003208"str(object) -> string\n\
3209\n\
3210Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003211If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003212
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003213PyTypeObject PyString_Type = {
3214 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003215 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003216 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003217 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003218 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003219 (destructor)string_dealloc, /* tp_dealloc */
3220 (printfunc)string_print, /* tp_print */
3221 0, /* tp_getattr */
3222 0, /* tp_setattr */
3223 0, /* tp_compare */
3224 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003225 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003226 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003227 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003228 (hashfunc)string_hash, /* tp_hash */
3229 0, /* tp_call */
3230 (reprfunc)string_str, /* tp_str */
3231 PyObject_GenericGetAttr, /* tp_getattro */
3232 0, /* tp_setattro */
3233 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003234 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3235 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003236 string_doc, /* tp_doc */
3237 0, /* tp_traverse */
3238 0, /* tp_clear */
3239 (richcmpfunc)string_richcompare, /* tp_richcompare */
3240 0, /* tp_weaklistoffset */
3241 0, /* tp_iter */
3242 0, /* tp_iternext */
3243 string_methods, /* tp_methods */
3244 0, /* tp_members */
3245 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003246 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003247 0, /* tp_dict */
3248 0, /* tp_descr_get */
3249 0, /* tp_descr_set */
3250 0, /* tp_dictoffset */
3251 0, /* tp_init */
3252 0, /* tp_alloc */
3253 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003254 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003255};
3256
3257void
Fred Drakeba096332000-07-09 07:04:36 +00003258PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003259{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003260 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003261 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003262 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 if (w == NULL || !PyString_Check(*pv)) {
3264 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003265 *pv = NULL;
3266 return;
3267 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003268 v = string_concat((PyStringObject *) *pv, w);
3269 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003270 *pv = v;
3271}
3272
Guido van Rossum013142a1994-08-30 08:19:36 +00003273void
Fred Drakeba096332000-07-09 07:04:36 +00003274PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003275{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003276 PyString_Concat(pv, w);
3277 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003278}
3279
3280
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003281/* The following function breaks the notion that strings are immutable:
3282 it changes the size of a string. We get away with this only if there
3283 is only one module referencing the object. You can also think of it
3284 as creating a new string object and destroying the old one, only
3285 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003286 already be known to some other part of the code...
3287 Note that if there's not enough memory to resize the string, the original
3288 string object at *pv is deallocated, *pv is set to NULL, an "out of
3289 memory" exception is set, and -1 is returned. Else (on success) 0 is
3290 returned, and the value in *pv may or may not be the same as on input.
3291 As always, an extra byte is allocated for a trailing \0 byte (newsize
3292 does *not* include that), and a trailing \0 byte is stored.
3293*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003294
3295int
Fred Drakeba096332000-07-09 07:04:36 +00003296_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003297{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003298 register PyObject *v;
3299 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003300 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003301 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003302 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 Py_DECREF(v);
3304 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003305 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003306 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003307 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003308 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003309 _Py_ForgetReference(v);
3310 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003311 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003313 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003314 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003315 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003316 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003317 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003318 _Py_NewReference(*pv);
3319 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003320 sv->ob_size = newsize;
3321 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003322 return 0;
3323}
Guido van Rossume5372401993-03-16 12:15:04 +00003324
3325/* Helpers for formatstring */
3326
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003327static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003328getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003329{
3330 int argidx = *p_argidx;
3331 if (argidx < arglen) {
3332 (*p_argidx)++;
3333 if (arglen < 0)
3334 return args;
3335 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003336 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003337 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003338 PyErr_SetString(PyExc_TypeError,
3339 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003340 return NULL;
3341}
3342
Tim Peters38fd5b62000-09-21 05:43:11 +00003343/* Format codes
3344 * F_LJUST '-'
3345 * F_SIGN '+'
3346 * F_BLANK ' '
3347 * F_ALT '#'
3348 * F_ZERO '0'
3349 */
Guido van Rossume5372401993-03-16 12:15:04 +00003350#define F_LJUST (1<<0)
3351#define F_SIGN (1<<1)
3352#define F_BLANK (1<<2)
3353#define F_ALT (1<<3)
3354#define F_ZERO (1<<4)
3355
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003356static int
Fred Drakeba096332000-07-09 07:04:36 +00003357formatfloat(char *buf, size_t buflen, int flags,
3358 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003359{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003360 /* fmt = '%#.' + `prec` + `type`
3361 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003362 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003363 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003364 x = PyFloat_AsDouble(v);
3365 if (x == -1.0 && PyErr_Occurred()) {
3366 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003367 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003368 }
Guido van Rossume5372401993-03-16 12:15:04 +00003369 if (prec < 0)
3370 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003371 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3372 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003373 /* Worst case length calc to ensure no buffer overrun:
3374
3375 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003376 fmt = %#.<prec>g
3377 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003378 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003379 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003380
3381 'f' formats:
3382 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3383 len = 1 + 50 + 1 + prec = 52 + prec
3384
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003385 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003386 always given), therefore increase the length by one.
3387
3388 */
3389 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3390 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003391 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003392 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003393 return -1;
3394 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003395 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3396 (flags&F_ALT) ? "#" : "",
3397 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003398 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003399 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003400}
3401
Tim Peters38fd5b62000-09-21 05:43:11 +00003402/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3403 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3404 * Python's regular ints.
3405 * Return value: a new PyString*, or NULL if error.
3406 * . *pbuf is set to point into it,
3407 * *plen set to the # of chars following that.
3408 * Caller must decref it when done using pbuf.
3409 * The string starting at *pbuf is of the form
3410 * "-"? ("0x" | "0X")? digit+
3411 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003412 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003413 * There will be at least prec digits, zero-filled on the left if
3414 * necessary to get that many.
3415 * val object to be converted
3416 * flags bitmask of format flags; only F_ALT is looked at
3417 * prec minimum number of digits; 0-fill on left if needed
3418 * type a character in [duoxX]; u acts the same as d
3419 *
3420 * CAUTION: o, x and X conversions on regular ints can never
3421 * produce a '-' sign, but can for Python's unbounded ints.
3422 */
3423PyObject*
3424_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3425 char **pbuf, int *plen)
3426{
3427 PyObject *result = NULL;
3428 char *buf;
3429 int i;
3430 int sign; /* 1 if '-', else 0 */
3431 int len; /* number of characters */
3432 int numdigits; /* len == numnondigits + numdigits */
3433 int numnondigits = 0;
3434
3435 switch (type) {
3436 case 'd':
3437 case 'u':
3438 result = val->ob_type->tp_str(val);
3439 break;
3440 case 'o':
3441 result = val->ob_type->tp_as_number->nb_oct(val);
3442 break;
3443 case 'x':
3444 case 'X':
3445 numnondigits = 2;
3446 result = val->ob_type->tp_as_number->nb_hex(val);
3447 break;
3448 default:
3449 assert(!"'type' not in [duoxX]");
3450 }
3451 if (!result)
3452 return NULL;
3453
3454 /* To modify the string in-place, there can only be one reference. */
3455 if (result->ob_refcnt != 1) {
3456 PyErr_BadInternalCall();
3457 return NULL;
3458 }
3459 buf = PyString_AsString(result);
3460 len = PyString_Size(result);
3461 if (buf[len-1] == 'L') {
3462 --len;
3463 buf[len] = '\0';
3464 }
3465 sign = buf[0] == '-';
3466 numnondigits += sign;
3467 numdigits = len - numnondigits;
3468 assert(numdigits > 0);
3469
Tim Petersfff53252001-04-12 18:38:48 +00003470 /* Get rid of base marker unless F_ALT */
3471 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003472 /* Need to skip 0x, 0X or 0. */
3473 int skipped = 0;
3474 switch (type) {
3475 case 'o':
3476 assert(buf[sign] == '0');
3477 /* If 0 is only digit, leave it alone. */
3478 if (numdigits > 1) {
3479 skipped = 1;
3480 --numdigits;
3481 }
3482 break;
3483 case 'x':
3484 case 'X':
3485 assert(buf[sign] == '0');
3486 assert(buf[sign + 1] == 'x');
3487 skipped = 2;
3488 numnondigits -= 2;
3489 break;
3490 }
3491 if (skipped) {
3492 buf += skipped;
3493 len -= skipped;
3494 if (sign)
3495 buf[0] = '-';
3496 }
3497 assert(len == numnondigits + numdigits);
3498 assert(numdigits > 0);
3499 }
3500
3501 /* Fill with leading zeroes to meet minimum width. */
3502 if (prec > numdigits) {
3503 PyObject *r1 = PyString_FromStringAndSize(NULL,
3504 numnondigits + prec);
3505 char *b1;
3506 if (!r1) {
3507 Py_DECREF(result);
3508 return NULL;
3509 }
3510 b1 = PyString_AS_STRING(r1);
3511 for (i = 0; i < numnondigits; ++i)
3512 *b1++ = *buf++;
3513 for (i = 0; i < prec - numdigits; i++)
3514 *b1++ = '0';
3515 for (i = 0; i < numdigits; i++)
3516 *b1++ = *buf++;
3517 *b1 = '\0';
3518 Py_DECREF(result);
3519 result = r1;
3520 buf = PyString_AS_STRING(result);
3521 len = numnondigits + prec;
3522 }
3523
3524 /* Fix up case for hex conversions. */
3525 switch (type) {
3526 case 'x':
3527 /* Need to convert all upper case letters to lower case. */
3528 for (i = 0; i < len; i++)
3529 if (buf[i] >= 'A' && buf[i] <= 'F')
3530 buf[i] += 'a'-'A';
3531 break;
3532 case 'X':
3533 /* Need to convert 0x to 0X (and -0x to -0X). */
3534 if (buf[sign + 1] == 'x')
3535 buf[sign + 1] = 'X';
3536 break;
3537 }
3538 *pbuf = buf;
3539 *plen = len;
3540 return result;
3541}
3542
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003543static int
Fred Drakeba096332000-07-09 07:04:36 +00003544formatint(char *buf, size_t buflen, int flags,
3545 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003546{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003547 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003548 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3549 + 1 + 1 = 24 */
3550 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003551 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003552
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003553 x = PyInt_AsLong(v);
3554 if (x == -1 && PyErr_Occurred()) {
3555 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003556 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003557 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003558 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003559 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003560 "%u/%o/%x/%X of negative int will return "
3561 "a signed string in Python 2.4 and up") < 0)
3562 return -1;
3563 }
Guido van Rossume5372401993-03-16 12:15:04 +00003564 if (prec < 0)
3565 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003566
3567 if ((flags & F_ALT) &&
3568 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003569 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003570 * of issues that cause pain:
3571 * - when 0 is being converted, the C standard leaves off
3572 * the '0x' or '0X', which is inconsistent with other
3573 * %#x/%#X conversions and inconsistent with Python's
3574 * hex() function
3575 * - there are platforms that violate the standard and
3576 * convert 0 with the '0x' or '0X'
3577 * (Metrowerks, Compaq Tru64)
3578 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003579 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003580 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003581 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003582 * We can achieve the desired consistency by inserting our
3583 * own '0x' or '0X' prefix, and substituting %x/%X in place
3584 * of %#x/%#X.
3585 *
3586 * Note that this is the same approach as used in
3587 * formatint() in unicodeobject.c
3588 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003589 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003590 type, prec, type);
3591 }
3592 else {
3593 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003594 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003595 prec, type);
3596 }
3597
Tim Peters38fd5b62000-09-21 05:43:11 +00003598 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003599 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3600 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003601 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003602 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003603 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604 return -1;
3605 }
Tim Peters885d4572001-11-28 20:27:42 +00003606 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003607 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003608}
3609
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003610static int
Fred Drakeba096332000-07-09 07:04:36 +00003611formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003612{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003613 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003614 if (PyString_Check(v)) {
3615 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003616 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003617 }
3618 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003619 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003620 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003621 }
3622 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003623 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003624}
3625
Guido van Rossum013142a1994-08-30 08:19:36 +00003626
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003627/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3628
3629 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3630 chars are formatted. XXX This is a magic number. Each formatting
3631 routine does bounds checking to ensure no overflow, but a better
3632 solution may be to malloc a buffer of appropriate size for each
3633 format. For now, the current solution is sufficient.
3634*/
3635#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003636
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003637PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003638PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003639{
3640 char *fmt, *res;
3641 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003642 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003643 PyObject *result, *orig_args;
3644#ifdef Py_USING_UNICODE
3645 PyObject *v, *w;
3646#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003647 PyObject *dict = NULL;
3648 if (format == NULL || !PyString_Check(format) || args == NULL) {
3649 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003650 return NULL;
3651 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003652 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003653 fmt = PyString_AS_STRING(format);
3654 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003655 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003656 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003657 if (result == NULL)
3658 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003659 res = PyString_AsString(result);
3660 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003661 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003662 argidx = 0;
3663 }
3664 else {
3665 arglen = -1;
3666 argidx = -2;
3667 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003668 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3669 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003670 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003671 while (--fmtcnt >= 0) {
3672 if (*fmt != '%') {
3673 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003674 rescnt = fmtcnt + 100;
3675 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003676 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003677 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003678 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003679 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003680 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003681 }
3682 *res++ = *fmt++;
3683 }
3684 else {
3685 /* Got a format specifier */
3686 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003687 int width = -1;
3688 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003689 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003690 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003691 PyObject *v = NULL;
3692 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003693 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003694 int sign;
3695 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003696 char formatbuf[FORMATBUFLEN];
3697 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003698#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003699 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003700 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003701#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003702
Guido van Rossumda9c2711996-12-05 21:58:58 +00003703 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003704 if (*fmt == '(') {
3705 char *keystart;
3706 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003707 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003708 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003709
3710 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003711 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003712 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003713 goto error;
3714 }
3715 ++fmt;
3716 --fmtcnt;
3717 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003718 /* Skip over balanced parentheses */
3719 while (pcount > 0 && --fmtcnt >= 0) {
3720 if (*fmt == ')')
3721 --pcount;
3722 else if (*fmt == '(')
3723 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003724 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003725 }
3726 keylen = fmt - keystart - 1;
3727 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003728 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003729 "incomplete format key");
3730 goto error;
3731 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003732 key = PyString_FromStringAndSize(keystart,
3733 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003734 if (key == NULL)
3735 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003736 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003737 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003738 args_owned = 0;
3739 }
3740 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003741 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003742 if (args == NULL) {
3743 goto error;
3744 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003745 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003746 arglen = -1;
3747 argidx = -2;
3748 }
Guido van Rossume5372401993-03-16 12:15:04 +00003749 while (--fmtcnt >= 0) {
3750 switch (c = *fmt++) {
3751 case '-': flags |= F_LJUST; continue;
3752 case '+': flags |= F_SIGN; continue;
3753 case ' ': flags |= F_BLANK; continue;
3754 case '#': flags |= F_ALT; continue;
3755 case '0': flags |= F_ZERO; continue;
3756 }
3757 break;
3758 }
3759 if (c == '*') {
3760 v = getnextarg(args, arglen, &argidx);
3761 if (v == NULL)
3762 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003763 if (!PyInt_Check(v)) {
3764 PyErr_SetString(PyExc_TypeError,
3765 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003766 goto error;
3767 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003768 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003769 if (width < 0) {
3770 flags |= F_LJUST;
3771 width = -width;
3772 }
Guido van Rossume5372401993-03-16 12:15:04 +00003773 if (--fmtcnt >= 0)
3774 c = *fmt++;
3775 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003776 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003777 width = c - '0';
3778 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003779 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003780 if (!isdigit(c))
3781 break;
3782 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003783 PyErr_SetString(
3784 PyExc_ValueError,
3785 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003786 goto error;
3787 }
3788 width = width*10 + (c - '0');
3789 }
3790 }
3791 if (c == '.') {
3792 prec = 0;
3793 if (--fmtcnt >= 0)
3794 c = *fmt++;
3795 if (c == '*') {
3796 v = getnextarg(args, arglen, &argidx);
3797 if (v == NULL)
3798 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003799 if (!PyInt_Check(v)) {
3800 PyErr_SetString(
3801 PyExc_TypeError,
3802 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003803 goto error;
3804 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003805 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003806 if (prec < 0)
3807 prec = 0;
3808 if (--fmtcnt >= 0)
3809 c = *fmt++;
3810 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003811 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003812 prec = c - '0';
3813 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003814 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003815 if (!isdigit(c))
3816 break;
3817 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003818 PyErr_SetString(
3819 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003820 "prec too big");
3821 goto error;
3822 }
3823 prec = prec*10 + (c - '0');
3824 }
3825 }
3826 } /* prec */
3827 if (fmtcnt >= 0) {
3828 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003829 if (--fmtcnt >= 0)
3830 c = *fmt++;
3831 }
3832 }
3833 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003834 PyErr_SetString(PyExc_ValueError,
3835 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003836 goto error;
3837 }
3838 if (c != '%') {
3839 v = getnextarg(args, arglen, &argidx);
3840 if (v == NULL)
3841 goto error;
3842 }
3843 sign = 0;
3844 fill = ' ';
3845 switch (c) {
3846 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003847 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003848 len = 1;
3849 break;
3850 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003851#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003852 if (PyUnicode_Check(v)) {
3853 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003854 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003855 goto unicode;
3856 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003857#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003858 /* Fall through */
3859 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003860 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003861 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003862 else
3863 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003864 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003865 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003866 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003867 /* XXX Note: this should never happen,
3868 since PyObject_Repr() and
3869 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003870 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003871 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003872 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003873 goto error;
3874 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003875 pbuf = PyString_AS_STRING(temp);
3876 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003877 if (prec >= 0 && len > prec)
3878 len = prec;
3879 break;
3880 case 'i':
3881 case 'd':
3882 case 'u':
3883 case 'o':
3884 case 'x':
3885 case 'X':
3886 if (c == 'i')
3887 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003888 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003889 temp = _PyString_FormatLong(v, flags,
3890 prec, c, &pbuf, &len);
3891 if (!temp)
3892 goto error;
3893 /* unbounded ints can always produce
3894 a sign character! */
3895 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003896 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003897 else {
3898 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003899 len = formatint(pbuf,
3900 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003901 flags, prec, c, v);
3902 if (len < 0)
3903 goto error;
3904 /* only d conversion is signed */
3905 sign = c == 'd';
3906 }
3907 if (flags & F_ZERO)
3908 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003909 break;
3910 case 'e':
3911 case 'E':
3912 case 'f':
3913 case 'g':
3914 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003915 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003916 len = formatfloat(pbuf, sizeof(formatbuf),
3917 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003918 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003919 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003920 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003921 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003922 fill = '0';
3923 break;
3924 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003925 pbuf = formatbuf;
3926 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003927 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003928 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003929 break;
3930 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003931 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003932 "unsupported format character '%c' (0x%x) "
3933 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003934 c, c,
3935 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003936 goto error;
3937 }
3938 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003939 if (*pbuf == '-' || *pbuf == '+') {
3940 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003941 len--;
3942 }
3943 else if (flags & F_SIGN)
3944 sign = '+';
3945 else if (flags & F_BLANK)
3946 sign = ' ';
3947 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003948 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003949 }
3950 if (width < len)
3951 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003952 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003953 reslen -= rescnt;
3954 rescnt = width + fmtcnt + 100;
3955 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003956 if (reslen < 0) {
3957 Py_DECREF(result);
3958 return PyErr_NoMemory();
3959 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003961 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003962 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003963 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003964 }
3965 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003966 if (fill != ' ')
3967 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003968 rescnt--;
3969 if (width > len)
3970 width--;
3971 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003972 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3973 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003974 assert(pbuf[1] == c);
3975 if (fill != ' ') {
3976 *res++ = *pbuf++;
3977 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003978 }
Tim Petersfff53252001-04-12 18:38:48 +00003979 rescnt -= 2;
3980 width -= 2;
3981 if (width < 0)
3982 width = 0;
3983 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003984 }
3985 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003986 do {
3987 --rescnt;
3988 *res++ = fill;
3989 } while (--width > len);
3990 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003991 if (fill == ' ') {
3992 if (sign)
3993 *res++ = sign;
3994 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003995 (c == 'x' || c == 'X')) {
3996 assert(pbuf[0] == '0');
3997 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003998 *res++ = *pbuf++;
3999 *res++ = *pbuf++;
4000 }
4001 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004002 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004003 res += len;
4004 rescnt -= len;
4005 while (--width >= len) {
4006 --rescnt;
4007 *res++ = ' ';
4008 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004009 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004011 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004012 goto error;
4013 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004014 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004015 } /* '%' */
4016 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004017 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004019 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004020 goto error;
4021 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004022 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004023 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004025 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004026 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004027
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004028#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004029 unicode:
4030 if (args_owned) {
4031 Py_DECREF(args);
4032 args_owned = 0;
4033 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004034 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004035 if (PyTuple_Check(orig_args) && argidx > 0) {
4036 PyObject *v;
4037 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4038 v = PyTuple_New(n);
4039 if (v == NULL)
4040 goto error;
4041 while (--n >= 0) {
4042 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4043 Py_INCREF(w);
4044 PyTuple_SET_ITEM(v, n, w);
4045 }
4046 args = v;
4047 } else {
4048 Py_INCREF(orig_args);
4049 args = orig_args;
4050 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004051 args_owned = 1;
4052 /* Take what we have of the result and let the Unicode formatting
4053 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004054 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004055 if (_PyString_Resize(&result, rescnt))
4056 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004057 fmtcnt = PyString_GET_SIZE(format) - \
4058 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004059 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4060 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004061 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004062 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004063 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004064 if (v == NULL)
4065 goto error;
4066 /* Paste what we have (result) to what the Unicode formatting
4067 function returned (v) and return the result (or error) */
4068 w = PyUnicode_Concat(result, v);
4069 Py_DECREF(result);
4070 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004071 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004072 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004073#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004074
Guido van Rossume5372401993-03-16 12:15:04 +00004075 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004076 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004077 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004078 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004079 }
Guido van Rossume5372401993-03-16 12:15:04 +00004080 return NULL;
4081}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004082
Guido van Rossum2a61e741997-01-18 07:55:05 +00004083void
Fred Drakeba096332000-07-09 07:04:36 +00004084PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004085{
4086 register PyStringObject *s = (PyStringObject *)(*p);
4087 PyObject *t;
4088 if (s == NULL || !PyString_Check(s))
4089 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004090 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004091 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004092 if (interned == NULL) {
4093 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094 if (interned == NULL) {
4095 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004096 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004097 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004098 }
4099 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4100 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004101 Py_DECREF(*p);
4102 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004103 return;
4104 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004105 /* Ensure that only true string objects appear in the intern dict */
4106 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004107 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4108 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004109 if (t == NULL) {
4110 PyErr_Clear();
4111 return;
Tim Peters111f6092001-09-12 07:54:51 +00004112 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004113 } else {
4114 t = (PyObject*) s;
4115 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004116 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004117
4118 if (PyDict_SetItem(interned, t, t) == 0) {
4119 /* The two references in interned are not counted by
4120 refcnt. The string deallocator will take care of this */
4121 ((PyObject *)t)->ob_refcnt-=2;
4122 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4123 Py_DECREF(*p);
4124 *p = t;
4125 return;
4126 }
4127 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004128 PyErr_Clear();
4129}
4130
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004131void
4132PyString_InternImmortal(PyObject **p)
4133{
4134 PyString_InternInPlace(p);
4135 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4136 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4137 Py_INCREF(*p);
4138 }
4139}
4140
Guido van Rossum2a61e741997-01-18 07:55:05 +00004141
4142PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004143PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004144{
4145 PyObject *s = PyString_FromString(cp);
4146 if (s == NULL)
4147 return NULL;
4148 PyString_InternInPlace(&s);
4149 return s;
4150}
4151
Guido van Rossum8cf04761997-08-02 02:57:45 +00004152void
Fred Drakeba096332000-07-09 07:04:36 +00004153PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004154{
4155 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004156 for (i = 0; i < UCHAR_MAX + 1; i++) {
4157 Py_XDECREF(characters[i]);
4158 characters[i] = NULL;
4159 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004160 Py_XDECREF(nullstring);
4161 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004162}
Barry Warsawa903ad982001-02-23 16:40:48 +00004163
Barry Warsawa903ad982001-02-23 16:40:48 +00004164void _Py_ReleaseInternedStrings(void)
4165{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004166 PyObject *keys;
4167 PyStringObject *s;
4168 int i, n;
4169
4170 if (interned == NULL || !PyDict_Check(interned))
4171 return;
4172 keys = PyDict_Keys(interned);
4173 if (keys == NULL || !PyList_Check(keys)) {
4174 PyErr_Clear();
4175 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004176 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004177
4178 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4179 detector, interned strings are not forcibly deallocated; rather, we
4180 give them their stolen references back, and then clear and DECREF
4181 the interned dict. */
4182
4183 fprintf(stderr, "releasing interned strings\n");
4184 n = PyList_GET_SIZE(keys);
4185 for (i = 0; i < n; i++) {
4186 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4187 switch (s->ob_sstate) {
4188 case SSTATE_NOT_INTERNED:
4189 /* XXX Shouldn't happen */
4190 break;
4191 case SSTATE_INTERNED_IMMORTAL:
4192 s->ob_refcnt += 1;
4193 break;
4194 case SSTATE_INTERNED_MORTAL:
4195 s->ob_refcnt += 2;
4196 break;
4197 default:
4198 Py_FatalError("Inconsistent interned string state.");
4199 }
4200 s->ob_sstate = SSTATE_NOT_INTERNED;
4201 }
4202 Py_DECREF(keys);
4203 PyDict_Clear(interned);
4204 Py_DECREF(interned);
4205 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004206}