blob: d3351df448a16fc6b179c512188daa4e8a68608c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000936 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000937 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000939 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 if (n < 0)
941 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000942 /* watch out for overflows: the size can overflow int,
943 * and the # of bytes needed can overflow size_t
944 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000946 if (n && size / n != a->ob_size) {
947 PyErr_SetString(PyExc_OverflowError,
948 "repeated string is too long");
949 return NULL;
950 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000951 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
Tim Peters8f422462000-09-09 06:13:41 +0000955 nbytes = size * sizeof(char);
956 if (nbytes / sizeof(char) != (size_t)size ||
957 nbytes + sizeof(PyStringObject) <= nbytes) {
958 PyErr_SetString(PyExc_OverflowError,
959 "repeated string is too long");
960 return NULL;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000963 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000969 op->ob_sval[size] = '\0';
970 if (a->ob_size == 1 && n > 0) {
971 memset(op->ob_sval, a->ob_sval[0] , n);
972 return (PyObject *) op;
973 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000974 i = 0;
975 if (i < size) {
976 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
977 i = (int) a->ob_size;
978 }
979 while (i < size) {
980 j = (i <= size-i) ? i : size-i;
981 memcpy(op->ob_sval+i, op->ob_sval, j);
982 i += j;
983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985}
986
987/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000990string_slice(register PyStringObject *a, register int i, register int j)
991 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
993 if (i < 0)
994 i = 0;
995 if (j < 0)
996 j = 0; /* Avoid signed/unsigned bug in next line */
997 if (j > a->ob_size)
998 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000999 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1000 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 Py_INCREF(a);
1002 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 }
1004 if (j < i)
1005 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007}
1008
Guido van Rossum9284a572000-03-07 15:53:43 +00001009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001011{
Barry Warsaw817918c2002-08-06 16:58:21 +00001012 const char *lhs, *rhs, *end;
1013 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014
1015 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (PyUnicode_Check(el))
1018 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001019#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001020 if (!PyString_Check(el)) {
1021 PyErr_SetString(PyExc_TypeError,
1022 "'in <string>' requires string as left operand");
1023 return -1;
1024 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001025 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 rhs = PyString_AS_STRING(el);
1028 lhs = PyString_AS_STRING(a);
1029
1030 /* optimize for a single character */
1031 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001032 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001033
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001034 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001035 while (lhs <= end) {
1036 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001037 return 1;
1038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Guido van Rossum9284a572000-03-07 15:53:43 +00001040 return 0;
1041}
1042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001044string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001047 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050 return NULL;
1051 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001052 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001053 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001054 if (v == NULL)
1055 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001056 else {
1057#ifdef COUNT_ALLOCS
1058 one_strings++;
1059#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001060 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001061 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001062 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Martin v. Löwiscd353062001-05-24 16:56:35 +00001065static PyObject*
1066string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001068 int c;
1069 int len_a, len_b;
1070 int min_len;
1071 PyObject *result;
1072
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001073 /* Make sure both arguments are strings. */
1074 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 result = Py_NotImplemented;
1076 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001077 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001078 if (a == b) {
1079 switch (op) {
1080 case Py_EQ:case Py_LE:case Py_GE:
1081 result = Py_True;
1082 goto out;
1083 case Py_NE:case Py_LT:case Py_GT:
1084 result = Py_False;
1085 goto out;
1086 }
1087 }
1088 if (op == Py_EQ) {
1089 /* Supporting Py_NE here as well does not save
1090 much time, since Py_NE is rarely used. */
1091 if (a->ob_size == b->ob_size
1092 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001093 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 a->ob_size) == 0)) {
1095 result = Py_True;
1096 } else {
1097 result = Py_False;
1098 }
1099 goto out;
1100 }
1101 len_a = a->ob_size; len_b = b->ob_size;
1102 min_len = (len_a < len_b) ? len_a : len_b;
1103 if (min_len > 0) {
1104 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1105 if (c==0)
1106 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1107 }else
1108 c = 0;
1109 if (c == 0)
1110 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1111 switch (op) {
1112 case Py_LT: c = c < 0; break;
1113 case Py_LE: c = c <= 0; break;
1114 case Py_EQ: assert(0); break; /* unreachable */
1115 case Py_NE: c = c != 0; break;
1116 case Py_GT: c = c > 0; break;
1117 case Py_GE: c = c >= 0; break;
1118 default:
1119 result = Py_NotImplemented;
1120 goto out;
1121 }
1122 result = c ? Py_True : Py_False;
1123 out:
1124 Py_INCREF(result);
1125 return result;
1126}
1127
1128int
1129_PyString_Eq(PyObject *o1, PyObject *o2)
1130{
1131 PyStringObject *a, *b;
1132 a = (PyStringObject*)o1;
1133 b = (PyStringObject*)o2;
1134 return a->ob_size == b->ob_size
1135 && *a->ob_sval == *b->ob_sval
1136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001137}
1138
Guido van Rossum9bfef441993-03-29 10:43:31 +00001139static long
Fred Drakeba096332000-07-09 07:04:36 +00001140string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001141{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 register int len;
1143 register unsigned char *p;
1144 register long x;
1145
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001146 if (a->ob_shash != -1)
1147 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 len = a->ob_size;
1149 p = (unsigned char *) a->ob_sval;
1150 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001151 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001152 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 x ^= a->ob_size;
1154 if (x == -1)
1155 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001156 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 return x;
1158}
1159
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001160static PyObject*
1161string_subscript(PyStringObject* self, PyObject* item)
1162{
1163 if (PyInt_Check(item)) {
1164 long i = PyInt_AS_LONG(item);
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PyLong_Check(item)) {
1170 long i = PyLong_AsLong(item);
1171 if (i == -1 && PyErr_Occurred())
1172 return NULL;
1173 if (i < 0)
1174 i += PyString_GET_SIZE(self);
1175 return string_item(self,i);
1176 }
1177 else if (PySlice_Check(item)) {
1178 int start, stop, step, slicelength, cur, i;
1179 char* source_buf;
1180 char* result_buf;
1181 PyObject* result;
1182
1183 if (PySlice_GetIndicesEx((PySliceObject*)item,
1184 PyString_GET_SIZE(self),
1185 &start, &stop, &step, &slicelength) < 0) {
1186 return NULL;
1187 }
1188
1189 if (slicelength <= 0) {
1190 return PyString_FromStringAndSize("", 0);
1191 }
1192 else {
1193 source_buf = PyString_AsString((PyObject*)self);
1194 result_buf = PyMem_Malloc(slicelength);
1195
1196 for (cur = start, i = 0; i < slicelength;
1197 cur += step, i++) {
1198 result_buf[i] = source_buf[cur];
1199 }
1200
1201 result = PyString_FromStringAndSize(result_buf,
1202 slicelength);
1203 PyMem_Free(result_buf);
1204 return result;
1205 }
1206 }
1207 else {
1208 PyErr_SetString(PyExc_TypeError,
1209 "string indices must be integers");
1210 return NULL;
1211 }
1212}
1213
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001218 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001219 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220 return -1;
1221 }
1222 *ptr = (void *)self->ob_sval;
1223 return self->ob_size;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
Guido van Rossum045e6881997-09-08 18:30:11 +00001229 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001230 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231 return -1;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
1237 if ( lenp )
1238 *lenp = self->ob_size;
1239 return 1;
1240}
1241
Guido van Rossum1db70701998-10-08 02:18:52 +00001242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001244{
1245 if ( index != 0 ) {
1246 PyErr_SetString(PyExc_SystemError,
1247 "accessing non-existent string segment");
1248 return -1;
1249 }
1250 *ptr = self->ob_sval;
1251 return self->ob_size;
1252}
1253
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001254static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001255 (inquiry)string_length, /*sq_length*/
1256 (binaryfunc)string_concat, /*sq_concat*/
1257 (intargfunc)string_repeat, /*sq_repeat*/
1258 (intargfunc)string_item, /*sq_item*/
1259 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001260 0, /*sq_ass_item*/
1261 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001262 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001263};
1264
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001265static PyMappingMethods string_as_mapping = {
1266 (inquiry)string_length,
1267 (binaryfunc)string_subscript,
1268 0,
1269};
1270
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271static PyBufferProcs string_as_buffer = {
1272 (getreadbufferproc)string_buffer_getreadbuf,
1273 (getwritebufferproc)string_buffer_getwritebuf,
1274 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001275 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276};
1277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
1279
1280#define LEFTSTRIP 0
1281#define RIGHTSTRIP 1
1282#define BOTHSTRIP 2
1283
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001285static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1286
1287#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289
1290static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001291split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294 PyObject* item;
1295 PyObject *list = PyList_New(0);
1296
1297 if (list == NULL)
1298 return NULL;
1299
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 for (i = j = 0; i < len; ) {
1301 while (i < len && isspace(Py_CHARMASK(s[i])))
1302 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 while (i < len && !isspace(Py_CHARMASK(s[i])))
1305 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 if (maxsplit-- <= 0)
1308 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1310 if (item == NULL)
1311 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
1318 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 }
1320 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321 if (j < len) {
1322 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1323 if (item == NULL)
1324 goto finally;
1325 err = PyList_Append(list, item);
1326 Py_DECREF(item);
1327 if (err < 0)
1328 goto finally;
1329 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return list;
1331 finally:
1332 Py_DECREF(list);
1333 return NULL;
1334}
1335
1336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001337PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338"S.split([sep [,maxsplit]]) -> list of strings\n\
1339\n\
1340Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001342splits are done. If sep is not specified or is None, any\n\
1343whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
1348 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 int maxsplit = -1;
1350 const char *s = PyString_AS_STRING(self), *sub;
1351 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (maxsplit < 0)
1356 maxsplit = INT_MAX;
1357 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (PyString_Check(subobj)) {
1360 sub = PyString_AS_STRING(subobj);
1361 n = PyString_GET_SIZE(subobj);
1362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001363#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 else if (PyUnicode_Check(subobj))
1365 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001366#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1368 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 if (n == 0) {
1370 PyErr_SetString(PyExc_ValueError, "empty separator");
1371 return NULL;
1372 }
1373
1374 list = PyList_New(0);
1375 if (list == NULL)
1376 return NULL;
1377
1378 i = j = 0;
1379 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001380 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 if (maxsplit-- <= 0)
1382 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1384 if (item == NULL)
1385 goto fail;
1386 err = PyList_Append(list, item);
1387 Py_DECREF(item);
1388 if (err < 0)
1389 goto fail;
1390 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
1392 else
1393 i++;
1394 }
1395 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1396 if (item == NULL)
1397 goto fail;
1398 err = PyList_Append(list, item);
1399 Py_DECREF(item);
1400 if (err < 0)
1401 goto fail;
1402
1403 return list;
1404
1405 fail:
1406 Py_DECREF(list);
1407 return NULL;
1408}
1409
1410
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001411PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412"S.join(sequence) -> string\n\
1413\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001415sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416
1417static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
1420 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001421 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 char *p;
1424 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 size_t sz = 0;
1426 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001427 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428
Tim Peters19fe14e2001-01-19 03:03:47 +00001429 seq = PySequence_Fast(orig, "");
1430 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001431 if (PyErr_ExceptionMatches(PyExc_TypeError))
1432 PyErr_Format(PyExc_TypeError,
1433 "sequence expected, %.80s found",
1434 orig->ob_type->tp_name);
1435 return NULL;
1436 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001437
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001438 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001439 if (seqlen == 0) {
1440 Py_DECREF(seq);
1441 return PyString_FromString("");
1442 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001445 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1446 PyErr_Format(PyExc_TypeError,
1447 "sequence item 0: expected string,"
1448 " %.80s found",
1449 item->ob_type->tp_name);
1450 Py_DECREF(seq);
1451 return NULL;
1452 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001453 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001454 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 /* There are at least two things to join. Do a pre-pass to figure out
1459 * the total amount of space we'll need (sz), see whether any argument
1460 * is absurd, and defer to the Unicode join if appropriate.
1461 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 item = PySequence_Fast_GET_ITEM(seq, i);
1465 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001466#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001468 /* Defer to Unicode join.
1469 * CAUTION: There's no gurantee that the
1470 * original sequence can be iterated over
1471 * again, so we must pass seq here.
1472 */
1473 PyObject *result;
1474 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001475 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001476 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001478#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001479 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001480 "sequence item %i: expected string,"
1481 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001482 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 Py_DECREF(seq);
1484 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001485 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 sz += PyString_GET_SIZE(item);
1487 if (i != 0)
1488 sz += seplen;
1489 if (sz < old_sz || sz > INT_MAX) {
1490 PyErr_SetString(PyExc_OverflowError,
1491 "join() is too long for a Python string");
1492 Py_DECREF(seq);
1493 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 }
1496
1497 /* Allocate result space. */
1498 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1499 if (res == NULL) {
1500 Py_DECREF(seq);
1501 return NULL;
1502 }
1503
1504 /* Catenate everything. */
1505 p = PyString_AS_STRING(res);
1506 for (i = 0; i < seqlen; ++i) {
1507 size_t n;
1508 item = PySequence_Fast_GET_ITEM(seq, i);
1509 n = PyString_GET_SIZE(item);
1510 memcpy(p, PyString_AS_STRING(item), n);
1511 p += n;
1512 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001513 memcpy(p, sep, seplen);
1514 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001515 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001517
Jeremy Hylton49048292000-07-11 03:28:17 +00001518 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520}
1521
Tim Peters52e155e2001-06-16 05:42:57 +00001522PyObject *
1523_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001524{
Tim Petersa7259592001-06-16 05:11:17 +00001525 assert(sep != NULL && PyString_Check(sep));
1526 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001527 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001528}
1529
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001530static void
1531string_adjust_indices(int *start, int *end, int len)
1532{
1533 if (*end > len)
1534 *end = len;
1535 else if (*end < 0)
1536 *end += len;
1537 if (*end < 0)
1538 *end = 0;
1539 if (*start < 0)
1540 *start += len;
1541 if (*start < 0)
1542 *start = 0;
1543}
1544
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545static long
Fred Drakeba096332000-07-09 07:04:36 +00001546string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549 int len = PyString_GET_SIZE(self);
1550 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001553 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001554 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return -2;
1556 if (PyString_Check(subobj)) {
1557 sub = PyString_AS_STRING(subobj);
1558 n = PyString_GET_SIZE(subobj);
1559 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001560#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001562 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001563#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 return -2;
1566
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001567 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (dir > 0) {
1570 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 last -= n;
1573 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001574 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 return (long)i;
1576 }
1577 else {
1578 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001579
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 if (n == 0 && i <= last)
1581 return (long)last;
1582 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001583 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 return (long)j;
1585 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001586
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 return -1;
1588}
1589
1590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.find(sub [,start [,end]]) -> int\n\
1593\n\
1594Return the lowest index in S where substring sub is found,\n\
1595such that sub is contained within s[start,end]. Optional\n\
1596arguments start and end are interpreted as in slice notation.\n\
1597\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001598Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599
1600static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001601string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001603 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 if (result == -2)
1605 return NULL;
1606 return PyInt_FromLong(result);
1607}
1608
1609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611"S.index(sub [,start [,end]]) -> int\n\
1612\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614
1615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001616string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (result == -2)
1620 return NULL;
1621 if (result == -1) {
1622 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001623 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 return NULL;
1625 }
1626 return PyInt_FromLong(result);
1627}
1628
1629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001630PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631"S.rfind(sub [,start [,end]]) -> int\n\
1632\n\
1633Return the highest index in S where substring sub is found,\n\
1634such that sub is contained within s[start,end]. Optional\n\
1635arguments start and end are interpreted as in slice notation.\n\
1636\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001637Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638
1639static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001640string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001642 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643 if (result == -2)
1644 return NULL;
1645 return PyInt_FromLong(result);
1646}
1647
1648
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001649PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650"S.rindex(sub [,start [,end]]) -> int\n\
1651\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001652Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653
1654static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001655string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 if (result == -2)
1659 return NULL;
1660 if (result == -1) {
1661 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001662 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 return NULL;
1664 }
1665 return PyInt_FromLong(result);
1666}
1667
1668
1669static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001670do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1671{
1672 char *s = PyString_AS_STRING(self);
1673 int len = PyString_GET_SIZE(self);
1674 char *sep = PyString_AS_STRING(sepobj);
1675 int seplen = PyString_GET_SIZE(sepobj);
1676 int i, j;
1677
1678 i = 0;
1679 if (striptype != RIGHTSTRIP) {
1680 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1681 i++;
1682 }
1683 }
1684
1685 j = len;
1686 if (striptype != LEFTSTRIP) {
1687 do {
1688 j--;
1689 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1690 j++;
1691 }
1692
1693 if (i == 0 && j == len && PyString_CheckExact(self)) {
1694 Py_INCREF(self);
1695 return (PyObject*)self;
1696 }
1697 else
1698 return PyString_FromStringAndSize(s+i, j-i);
1699}
1700
1701
1702static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704{
1705 char *s = PyString_AS_STRING(self);
1706 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708 i = 0;
1709 if (striptype != RIGHTSTRIP) {
1710 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1711 i++;
1712 }
1713 }
1714
1715 j = len;
1716 if (striptype != LEFTSTRIP) {
1717 do {
1718 j--;
1719 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1720 j++;
1721 }
1722
Tim Peters8fa5dd02001-09-12 02:18:30 +00001723 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 Py_INCREF(self);
1725 return (PyObject*)self;
1726 }
1727 else
1728 return PyString_FromStringAndSize(s+i, j-i);
1729}
1730
1731
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001732static PyObject *
1733do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1734{
1735 PyObject *sep = NULL;
1736
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001737 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001738 return NULL;
1739
1740 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001741 if (PyString_Check(sep))
1742 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001743#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001744 else if (PyUnicode_Check(sep)) {
1745 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1746 PyObject *res;
1747 if (uniself==NULL)
1748 return NULL;
1749 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1750 striptype, sep);
1751 Py_DECREF(uniself);
1752 return res;
1753 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001754#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001755 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001757#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001758 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001759#else
1760 "%s arg must be None or str",
1761#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001762 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763 return NULL;
1764 }
1765 return do_xstrip(self, striptype, sep);
1766 }
1767
1768 return do_strip(self, striptype);
1769}
1770
1771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001773"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774\n\
1775Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001777If chars is given and not None, remove characters in chars instead.\n\
1778If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, BOTHSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001791"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001794If chars is given and not None, remove characters in chars instead.\n\
1795If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001800 if (PyTuple_GET_SIZE(args) == 0)
1801 return do_strip(self, LEFTSTRIP); /* Common case */
1802 else
1803 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804}
1805
1806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001808"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001810Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001811If chars is given and not None, remove characters in chars instead.\n\
1812If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
1814static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001815string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001817 if (PyTuple_GET_SIZE(args) == 0)
1818 return do_strip(self, RIGHTSTRIP); /* Common case */
1819 else
1820 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
1823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825"S.lower() -> string\n\
1826\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001827Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828
1829static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001830string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831{
1832 char *s = PyString_AS_STRING(self), *s_new;
1833 int i, n = PyString_GET_SIZE(self);
1834 PyObject *new;
1835
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 new = PyString_FromStringAndSize(NULL, n);
1837 if (new == NULL)
1838 return NULL;
1839 s_new = PyString_AsString(new);
1840 for (i = 0; i < n; i++) {
1841 int c = Py_CHARMASK(*s++);
1842 if (isupper(c)) {
1843 *s_new = tolower(c);
1844 } else
1845 *s_new = c;
1846 s_new++;
1847 }
1848 return new;
1849}
1850
1851
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853"S.upper() -> string\n\
1854\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001855Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856
1857static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001858string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859{
1860 char *s = PyString_AS_STRING(self), *s_new;
1861 int i, n = PyString_GET_SIZE(self);
1862 PyObject *new;
1863
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 new = PyString_FromStringAndSize(NULL, n);
1865 if (new == NULL)
1866 return NULL;
1867 s_new = PyString_AsString(new);
1868 for (i = 0; i < n; i++) {
1869 int c = Py_CHARMASK(*s++);
1870 if (islower(c)) {
1871 *s_new = toupper(c);
1872 } else
1873 *s_new = c;
1874 s_new++;
1875 }
1876 return new;
1877}
1878
1879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001880PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881"S.title() -> string\n\
1882\n\
1883Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001884characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885
1886static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001887string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888{
1889 char *s = PyString_AS_STRING(self), *s_new;
1890 int i, n = PyString_GET_SIZE(self);
1891 int previous_is_cased = 0;
1892 PyObject *new;
1893
Guido van Rossum4c08d552000-03-10 22:55:18 +00001894 new = PyString_FromStringAndSize(NULL, n);
1895 if (new == NULL)
1896 return NULL;
1897 s_new = PyString_AsString(new);
1898 for (i = 0; i < n; i++) {
1899 int c = Py_CHARMASK(*s++);
1900 if (islower(c)) {
1901 if (!previous_is_cased)
1902 c = toupper(c);
1903 previous_is_cased = 1;
1904 } else if (isupper(c)) {
1905 if (previous_is_cased)
1906 c = tolower(c);
1907 previous_is_cased = 1;
1908 } else
1909 previous_is_cased = 0;
1910 *s_new++ = c;
1911 }
1912 return new;
1913}
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.capitalize() -> string\n\
1917\n\
1918Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001922string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
1924 char *s = PyString_AS_STRING(self), *s_new;
1925 int i, n = PyString_GET_SIZE(self);
1926 PyObject *new;
1927
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 new = PyString_FromStringAndSize(NULL, n);
1929 if (new == NULL)
1930 return NULL;
1931 s_new = PyString_AsString(new);
1932 if (0 < n) {
1933 int c = Py_CHARMASK(*s++);
1934 if (islower(c))
1935 *s_new = toupper(c);
1936 else
1937 *s_new = c;
1938 s_new++;
1939 }
1940 for (i = 1; i < n; i++) {
1941 int c = Py_CHARMASK(*s++);
1942 if (isupper(c))
1943 *s_new = tolower(c);
1944 else
1945 *s_new = c;
1946 s_new++;
1947 }
1948 return new;
1949}
1950
1951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953"S.count(sub[, start[, end]]) -> int\n\
1954\n\
1955Return the number of occurrences of substring sub in string\n\
1956S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 int len = PyString_GET_SIZE(self), n;
1964 int i = 0, last = INT_MAX;
1965 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
Guido van Rossumc6821402000-05-08 14:08:05 +00001968 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1969 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001971
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 if (PyString_Check(subobj)) {
1973 sub = PyString_AS_STRING(subobj);
1974 n = PyString_GET_SIZE(subobj);
1975 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001976#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001977 else if (PyUnicode_Check(subobj)) {
1978 int count;
1979 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1980 if (count == -1)
1981 return NULL;
1982 else
1983 return PyInt_FromLong((long) count);
1984 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001985#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1987 return NULL;
1988
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001989 string_adjust_indices(&i, &last, len);
1990
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991 m = last + 1 - n;
1992 if (n == 0)
1993 return PyInt_FromLong((long) (m-i));
1994
1995 r = 0;
1996 while (i < m) {
1997 if (!memcmp(s+i, sub, n)) {
1998 r++;
1999 i += n;
2000 } else {
2001 i++;
2002 }
2003 }
2004 return PyInt_FromLong((long) r);
2005}
2006
2007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009"S.swapcase() -> string\n\
2010\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013
2014static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002015string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016{
2017 char *s = PyString_AS_STRING(self), *s_new;
2018 int i, n = PyString_GET_SIZE(self);
2019 PyObject *new;
2020
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 new = PyString_FromStringAndSize(NULL, n);
2022 if (new == NULL)
2023 return NULL;
2024 s_new = PyString_AsString(new);
2025 for (i = 0; i < n; i++) {
2026 int c = Py_CHARMASK(*s++);
2027 if (islower(c)) {
2028 *s_new = toupper(c);
2029 }
2030 else if (isupper(c)) {
2031 *s_new = tolower(c);
2032 }
2033 else
2034 *s_new = c;
2035 s_new++;
2036 }
2037 return new;
2038}
2039
2040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002041PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042"S.translate(table [,deletechars]) -> string\n\
2043\n\
2044Return a copy of the string S, where all characters occurring\n\
2045in the optional argument deletechars are removed, and the\n\
2046remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002047translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048
2049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002050string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 register char *input, *output;
2053 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054 register int i, c, changed = 0;
2055 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057 int inlen, tablen, dellen = 0;
2058 PyObject *result;
2059 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002062 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (PyString_Check(tableobj)) {
2067 table1 = PyString_AS_STRING(tableobj);
2068 tablen = PyString_GET_SIZE(tableobj);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002072 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 parameter; instead a mapping to None will cause characters
2074 to be deleted. */
2075 if (delobj != NULL) {
2076 PyErr_SetString(PyExc_TypeError,
2077 "deletions are implemented differently for unicode");
2078 return NULL;
2079 }
2080 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2081 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002082#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085
Martin v. Löwis00b61272002-12-12 20:03:19 +00002086 if (tablen != 256) {
2087 PyErr_SetString(PyExc_ValueError,
2088 "translation table must be 256 characters long");
2089 return NULL;
2090 }
2091
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 if (delobj != NULL) {
2093 if (PyString_Check(delobj)) {
2094 del_table = PyString_AS_STRING(delobj);
2095 dellen = PyString_GET_SIZE(delobj);
2096 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002097#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 else if (PyUnicode_Check(delobj)) {
2099 PyErr_SetString(PyExc_TypeError,
2100 "deletions are implemented differently for unicode");
2101 return NULL;
2102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002103#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2105 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 }
2107 else {
2108 del_table = NULL;
2109 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 }
2111
2112 table = table1;
2113 inlen = PyString_Size(input_obj);
2114 result = PyString_FromStringAndSize((char *)NULL, inlen);
2115 if (result == NULL)
2116 return NULL;
2117 output_start = output = PyString_AsString(result);
2118 input = PyString_AsString(input_obj);
2119
2120 if (dellen == 0) {
2121 /* If no deletions are required, use faster code */
2122 for (i = inlen; --i >= 0; ) {
2123 c = Py_CHARMASK(*input++);
2124 if (Py_CHARMASK((*output++ = table[c])) != c)
2125 changed = 1;
2126 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return result;
2129 Py_DECREF(result);
2130 Py_INCREF(input_obj);
2131 return input_obj;
2132 }
2133
2134 for (i = 0; i < 256; i++)
2135 trans_table[i] = Py_CHARMASK(table[i]);
2136
2137 for (i = 0; i < dellen; i++)
2138 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2139
2140 for (i = inlen; --i >= 0; ) {
2141 c = Py_CHARMASK(*input++);
2142 if (trans_table[c] != -1)
2143 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2144 continue;
2145 changed = 1;
2146 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002147 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 Py_DECREF(result);
2149 Py_INCREF(input_obj);
2150 return input_obj;
2151 }
2152 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002153 if (inlen > 0)
2154 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155 return result;
2156}
2157
2158
2159/* What follows is used for implementing replace(). Perry Stoll. */
2160
2161/*
2162 mymemfind
2163
2164 strstr replacement for arbitrary blocks of memory.
2165
Barry Warsaw51ac5802000-03-20 16:36:48 +00002166 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 contents of memory pointed to by PAT. Returns the index into MEM if
2168 found, or -1 if not found. If len of PAT is greater than length of
2169 MEM, the function returns -1.
2170*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002171static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002172mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173{
2174 register int ii;
2175
2176 /* pattern can not occur in the last pat_len-1 chars */
2177 len -= pat_len;
2178
2179 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002180 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 return ii;
2182 }
2183 }
2184 return -1;
2185}
2186
2187/*
2188 mymemcnt
2189
2190 Return the number of distinct times PAT is found in MEM.
2191 meaning mem=1111 and pat==11 returns 2.
2192 mem=11111 and pat==11 also return 2.
2193 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002194static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002195mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196{
2197 register int offset = 0;
2198 int nfound = 0;
2199
2200 while (len >= 0) {
2201 offset = mymemfind(mem, len, pat, pat_len);
2202 if (offset == -1)
2203 break;
2204 mem += offset + pat_len;
2205 len -= offset + pat_len;
2206 nfound++;
2207 }
2208 return nfound;
2209}
2210
2211/*
2212 mymemreplace
2213
Thomas Wouters7e474022000-07-16 12:04:32 +00002214 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215 replaced with SUB.
2216
Thomas Wouters7e474022000-07-16 12:04:32 +00002217 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 of PAT in STR, then the original string is returned. Otherwise, a new
2219 string is allocated here and returned.
2220
2221 on return, out_len is:
2222 the length of output string, or
2223 -1 if the input string is returned, or
2224 unchanged if an error occurs (no memory).
2225
2226 return value is:
2227 the new string allocated locally, or
2228 NULL if an error occurred.
2229*/
2230static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002231mymemreplace(const char *str, int len, /* input string */
2232 const char *pat, int pat_len, /* pattern string to find */
2233 const char *sub, int sub_len, /* substitution string */
2234 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236{
2237 char *out_s;
2238 char *new_s;
2239 int nfound, offset, new_len;
2240
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002241 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242 goto return_same;
2243
2244 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002245 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002246 if (count < 0)
2247 count = INT_MAX;
2248 else if (nfound > count)
2249 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 if (nfound == 0)
2251 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254 if (new_len == 0) {
2255 /* Have to allocate something for the caller to free(). */
2256 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002257 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002258 return NULL;
2259 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002261 else {
2262 assert(new_len > 0);
2263 new_s = (char *)PyMem_MALLOC(new_len);
2264 if (new_s == NULL)
2265 return NULL;
2266 out_s = new_s;
2267
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002268 if (pat_len > 0) {
2269 for (; nfound > 0; --nfound) {
2270 /* find index of next instance of pattern */
2271 offset = mymemfind(str, len, pat, pat_len);
2272 if (offset == -1)
2273 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002274
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002275 /* copy non matching part of input string */
2276 memcpy(new_s, str, offset);
2277 str += offset + pat_len;
2278 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002279
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002280 /* copy substitute into the output string */
2281 new_s += offset;
2282 memcpy(new_s, sub, sub_len);
2283 new_s += sub_len;
2284 }
2285 /* copy any remaining values into output string */
2286 if (len > 0)
2287 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002288 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002289 else {
2290 for (;;++str, --len) {
2291 memcpy(new_s, sub, sub_len);
2292 new_s += sub_len;
2293 if (--nfound <= 0) {
2294 memcpy(new_s, str, len);
2295 break;
2296 }
2297 *new_s++ = *str;
2298 }
2299 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002300 }
2301 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302 return out_s;
2303
2304 return_same:
2305 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002306 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307}
2308
2309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002310PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002311"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312\n\
2313Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002314old replaced by new. If the optional argument count is\n\
2315given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
2317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002318string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 const char *str = PyString_AS_STRING(self), *sub, *repl;
2321 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002322 const int len = PyString_GET_SIZE(self);
2323 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 if (!PyArg_ParseTuple(args, "OO|i:replace",
2329 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331
2332 if (PyString_Check(subobj)) {
2333 sub = PyString_AS_STRING(subobj);
2334 sub_len = PyString_GET_SIZE(subobj);
2335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002336#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002338 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002340#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2342 return NULL;
2343
2344 if (PyString_Check(replobj)) {
2345 repl = PyString_AS_STRING(replobj);
2346 repl_len = PyString_GET_SIZE(replobj);
2347 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002348#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002350 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002352#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2354 return NULL;
2355
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 if (new_s == NULL) {
2358 PyErr_NoMemory();
2359 return NULL;
2360 }
2361 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002362 if (PyString_CheckExact(self)) {
2363 /* we're returning another reference to self */
2364 new = (PyObject*)self;
2365 Py_INCREF(new);
2366 }
2367 else {
2368 new = PyString_FromStringAndSize(str, len);
2369 if (new == NULL)
2370 return NULL;
2371 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 }
2373 else {
2374 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002375 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 }
2377 return new;
2378}
2379
2380
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002381PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002382"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002384Return True if S starts with the specified prefix, False otherwise.\n\
2385With optional start, test S beginning at that position.\n\
2386With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387
2388static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002389string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 int plen;
2395 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002396 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398
Guido van Rossumc6821402000-05-08 14:08:05 +00002399 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2400 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 return NULL;
2402 if (PyString_Check(subobj)) {
2403 prefix = PyString_AS_STRING(subobj);
2404 plen = PyString_GET_SIZE(subobj);
2405 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002406#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002407 else if (PyUnicode_Check(subobj)) {
2408 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002409 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002410 subobj, start, end, -1);
2411 if (rc == -1)
2412 return NULL;
2413 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002414 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002416#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 return NULL;
2419
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002420 string_adjust_indices(&start, &end, len);
2421
2422 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002423 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002425 if (end-start >= plen)
2426 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2427 else
2428 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429}
2430
2431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002432PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002433"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002435Return True if S ends with the specified suffix, False otherwise.\n\
2436With optional start, test S beginning at that position.\n\
2437With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002440string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 const char* suffix;
2445 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002447 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
Guido van Rossumc6821402000-05-08 14:08:05 +00002450 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2451 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452 return NULL;
2453 if (PyString_Check(subobj)) {
2454 suffix = PyString_AS_STRING(subobj);
2455 slen = PyString_GET_SIZE(subobj);
2456 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002457#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002458 else if (PyUnicode_Check(subobj)) {
2459 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002460 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002461 subobj, start, end, +1);
2462 if (rc == -1)
2463 return NULL;
2464 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002465 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002467#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469 return NULL;
2470
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002471 string_adjust_indices(&start, &end, len);
2472
2473 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002474 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002476 if (end-slen > start)
2477 start = end - slen;
2478 if (end-start >= slen)
2479 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2480 else
2481 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482}
2483
2484
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002485PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002486"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002487\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002488Encodes S using the codec registered for encoding. encoding defaults\n\
2489to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002490handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002491a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2492'xmlcharrefreplace' as well as any other name registered with\n\
2493codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002494
2495static PyObject *
2496string_encode(PyStringObject *self, PyObject *args)
2497{
2498 char *encoding = NULL;
2499 char *errors = NULL;
2500 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2501 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002502 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2503}
2504
2505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002506PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002507"S.decode([encoding[,errors]]) -> object\n\
2508\n\
2509Decodes S using the codec registered for encoding. encoding defaults\n\
2510to the default encoding. errors may be given to set a different error\n\
2511handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002512a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2513as well as any other name registerd with codecs.register_error that is\n\
2514able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002515
2516static PyObject *
2517string_decode(PyStringObject *self, PyObject *args)
2518{
2519 char *encoding = NULL;
2520 char *errors = NULL;
2521 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2522 return NULL;
2523 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002524}
2525
2526
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002527PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528"S.expandtabs([tabsize]) -> string\n\
2529\n\
2530Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002531If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532
2533static PyObject*
2534string_expandtabs(PyStringObject *self, PyObject *args)
2535{
2536 const char *e, *p;
2537 char *q;
2538 int i, j;
2539 PyObject *u;
2540 int tabsize = 8;
2541
2542 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2543 return NULL;
2544
Thomas Wouters7e474022000-07-16 12:04:32 +00002545 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 i = j = 0;
2547 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2548 for (p = PyString_AS_STRING(self); p < e; p++)
2549 if (*p == '\t') {
2550 if (tabsize > 0)
2551 j += tabsize - (j % tabsize);
2552 }
2553 else {
2554 j++;
2555 if (*p == '\n' || *p == '\r') {
2556 i += j;
2557 j = 0;
2558 }
2559 }
2560
2561 /* Second pass: create output string and fill it */
2562 u = PyString_FromStringAndSize(NULL, i + j);
2563 if (!u)
2564 return NULL;
2565
2566 j = 0;
2567 q = PyString_AS_STRING(u);
2568
2569 for (p = PyString_AS_STRING(self); p < e; p++)
2570 if (*p == '\t') {
2571 if (tabsize > 0) {
2572 i = tabsize - (j % tabsize);
2573 j += i;
2574 while (i--)
2575 *q++ = ' ';
2576 }
2577 }
2578 else {
2579 j++;
2580 *q++ = *p;
2581 if (*p == '\n' || *p == '\r')
2582 j = 0;
2583 }
2584
2585 return u;
2586}
2587
Tim Peters8fa5dd02001-09-12 02:18:30 +00002588static PyObject *
2589pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590{
2591 PyObject *u;
2592
2593 if (left < 0)
2594 left = 0;
2595 if (right < 0)
2596 right = 0;
2597
Tim Peters8fa5dd02001-09-12 02:18:30 +00002598 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 Py_INCREF(self);
2600 return (PyObject *)self;
2601 }
2602
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002603 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 left + PyString_GET_SIZE(self) + right);
2605 if (u) {
2606 if (left)
2607 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002608 memcpy(PyString_AS_STRING(u) + left,
2609 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610 PyString_GET_SIZE(self));
2611 if (right)
2612 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2613 fill, right);
2614 }
2615
2616 return u;
2617}
2618
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002619PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002620"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002621"\n"
2622"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002623"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624
2625static PyObject *
2626string_ljust(PyStringObject *self, PyObject *args)
2627{
2628 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002629 char fillchar = ' ';
2630
2631 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002632 return NULL;
2633
Tim Peters8fa5dd02001-09-12 02:18:30 +00002634 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 Py_INCREF(self);
2636 return (PyObject*) self;
2637 }
2638
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002639 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002640}
2641
2642
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002643PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002644"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002645"\n"
2646"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002647"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648
2649static PyObject *
2650string_rjust(PyStringObject *self, PyObject *args)
2651{
2652 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002653 char fillchar = ' ';
2654
2655 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002656 return NULL;
2657
Tim Peters8fa5dd02001-09-12 02:18:30 +00002658 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 Py_INCREF(self);
2660 return (PyObject*) self;
2661 }
2662
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002663 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664}
2665
2666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002667PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002668"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002669"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002670"Return S centered in a string of length width. Padding is\n"
2671"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672
2673static PyObject *
2674string_center(PyStringObject *self, PyObject *args)
2675{
2676 int marg, left;
2677 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002678 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002680 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002681 return NULL;
2682
Tim Peters8fa5dd02001-09-12 02:18:30 +00002683 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684 Py_INCREF(self);
2685 return (PyObject*) self;
2686 }
2687
2688 marg = width - PyString_GET_SIZE(self);
2689 left = marg / 2 + (marg & width & 1);
2690
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002691 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002692}
2693
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002694PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002695"S.zfill(width) -> string\n"
2696"\n"
2697"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002698"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002699
2700static PyObject *
2701string_zfill(PyStringObject *self, PyObject *args)
2702{
2703 int fill;
2704 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002705 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002706
2707 int width;
2708 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2709 return NULL;
2710
2711 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002712 if (PyString_CheckExact(self)) {
2713 Py_INCREF(self);
2714 return (PyObject*) self;
2715 }
2716 else
2717 return PyString_FromStringAndSize(
2718 PyString_AS_STRING(self),
2719 PyString_GET_SIZE(self)
2720 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002721 }
2722
2723 fill = width - PyString_GET_SIZE(self);
2724
2725 s = pad(self, fill, 0, '0');
2726
2727 if (s == NULL)
2728 return NULL;
2729
2730 p = PyString_AS_STRING(s);
2731 if (p[fill] == '+' || p[fill] == '-') {
2732 /* move sign to beginning of string */
2733 p[0] = p[fill];
2734 p[fill] = '0';
2735 }
2736
2737 return (PyObject*) s;
2738}
2739
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002740PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002741"S.isspace() -> bool\n\
2742\n\
2743Return True if all characters in S are whitespace\n\
2744and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002745
2746static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002747string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748{
Fred Drakeba096332000-07-09 07:04:36 +00002749 register const unsigned char *p
2750 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002751 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002752
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753 /* Shortcut for single character strings */
2754 if (PyString_GET_SIZE(self) == 1 &&
2755 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002756 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002757
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002758 /* Special case for empty strings */
2759 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002760 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002761
Guido van Rossum4c08d552000-03-10 22:55:18 +00002762 e = p + PyString_GET_SIZE(self);
2763 for (; p < e; p++) {
2764 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002765 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002766 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002768}
2769
2770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002771PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002772"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002773\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002774Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002775and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002776
2777static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002778string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779{
Fred Drakeba096332000-07-09 07:04:36 +00002780 register const unsigned char *p
2781 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002782 register const unsigned char *e;
2783
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002784 /* Shortcut for single character strings */
2785 if (PyString_GET_SIZE(self) == 1 &&
2786 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002787 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002788
2789 /* Special case for empty strings */
2790 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002791 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002792
2793 e = p + PyString_GET_SIZE(self);
2794 for (; p < e; p++) {
2795 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002796 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002797 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002799}
2800
2801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002802PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002803"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002804\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002805Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002806and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002807
2808static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002809string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810{
Fred Drakeba096332000-07-09 07:04:36 +00002811 register const unsigned char *p
2812 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002813 register const unsigned char *e;
2814
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002815 /* Shortcut for single character strings */
2816 if (PyString_GET_SIZE(self) == 1 &&
2817 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002818 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002819
2820 /* Special case for empty strings */
2821 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002822 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002823
2824 e = p + PyString_GET_SIZE(self);
2825 for (; p < e; p++) {
2826 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002827 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002828 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002830}
2831
2832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002833PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002834"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002836Return True if all characters in S are digits\n\
2837and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002838
2839static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002840string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841{
Fred Drakeba096332000-07-09 07:04:36 +00002842 register const unsigned char *p
2843 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002844 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
Guido van Rossum4c08d552000-03-10 22:55:18 +00002846 /* Shortcut for single character strings */
2847 if (PyString_GET_SIZE(self) == 1 &&
2848 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002849 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002851 /* Special case for empty strings */
2852 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002853 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002854
Guido van Rossum4c08d552000-03-10 22:55:18 +00002855 e = p + PyString_GET_SIZE(self);
2856 for (; p < e; p++) {
2857 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002858 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002859 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861}
2862
2863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002864PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002865"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002867Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002868at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869
2870static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002871string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872{
Fred Drakeba096332000-07-09 07:04:36 +00002873 register const unsigned char *p
2874 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002875 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876 int cased;
2877
Guido van Rossum4c08d552000-03-10 22:55:18 +00002878 /* Shortcut for single character strings */
2879 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002880 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002882 /* Special case for empty strings */
2883 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002884 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002885
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 e = p + PyString_GET_SIZE(self);
2887 cased = 0;
2888 for (; p < e; p++) {
2889 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002890 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891 else if (!cased && islower(*p))
2892 cased = 1;
2893 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002894 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895}
2896
2897
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002898PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002899"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002901Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002902at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002903
2904static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002905string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906{
Fred Drakeba096332000-07-09 07:04:36 +00002907 register const unsigned char *p
2908 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002909 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910 int cased;
2911
Guido van Rossum4c08d552000-03-10 22:55:18 +00002912 /* Shortcut for single character strings */
2913 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002914 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002916 /* Special case for empty strings */
2917 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002918 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002919
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 e = p + PyString_GET_SIZE(self);
2921 cased = 0;
2922 for (; p < e; p++) {
2923 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002924 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925 else if (!cased && isupper(*p))
2926 cased = 1;
2927 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002928 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929}
2930
2931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002932PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002933"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002934\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002935Return True if S is a titlecased string and there is at least one\n\
2936character in S, i.e. uppercase characters may only follow uncased\n\
2937characters and lowercase characters only cased ones. Return False\n\
2938otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002939
2940static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002941string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942{
Fred Drakeba096332000-07-09 07:04:36 +00002943 register const unsigned char *p
2944 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002945 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946 int cased, previous_is_cased;
2947
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948 /* Shortcut for single character strings */
2949 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002950 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002951
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002952 /* Special case for empty strings */
2953 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002954 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002955
Guido van Rossum4c08d552000-03-10 22:55:18 +00002956 e = p + PyString_GET_SIZE(self);
2957 cased = 0;
2958 previous_is_cased = 0;
2959 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002960 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961
2962 if (isupper(ch)) {
2963 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965 previous_is_cased = 1;
2966 cased = 1;
2967 }
2968 else if (islower(ch)) {
2969 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002970 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002971 previous_is_cased = 1;
2972 cased = 1;
2973 }
2974 else
2975 previous_is_cased = 0;
2976 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002977 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978}
2979
2980
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002981PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002982"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983\n\
2984Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002985Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002986is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987
2988#define SPLIT_APPEND(data, left, right) \
2989 str = PyString_FromStringAndSize(data + left, right - left); \
2990 if (!str) \
2991 goto onError; \
2992 if (PyList_Append(list, str)) { \
2993 Py_DECREF(str); \
2994 goto onError; \
2995 } \
2996 else \
2997 Py_DECREF(str);
2998
2999static PyObject*
3000string_splitlines(PyStringObject *self, PyObject *args)
3001{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003002 register int i;
3003 register int j;
3004 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003005 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003006 PyObject *list;
3007 PyObject *str;
3008 char *data;
3009
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003010 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003011 return NULL;
3012
3013 data = PyString_AS_STRING(self);
3014 len = PyString_GET_SIZE(self);
3015
Guido van Rossum4c08d552000-03-10 22:55:18 +00003016 list = PyList_New(0);
3017 if (!list)
3018 goto onError;
3019
3020 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003021 int eol;
3022
Guido van Rossum4c08d552000-03-10 22:55:18 +00003023 /* Find a line and append it */
3024 while (i < len && data[i] != '\n' && data[i] != '\r')
3025 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003026
3027 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003028 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029 if (i < len) {
3030 if (data[i] == '\r' && i + 1 < len &&
3031 data[i+1] == '\n')
3032 i += 2;
3033 else
3034 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003035 if (keepends)
3036 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003037 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003038 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039 j = i;
3040 }
3041 if (j < len) {
3042 SPLIT_APPEND(data, j, len);
3043 }
3044
3045 return list;
3046
3047 onError:
3048 Py_DECREF(list);
3049 return NULL;
3050}
3051
3052#undef SPLIT_APPEND
3053
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003054static PyObject *
3055string_getnewargs(PyStringObject *v)
3056{
3057 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3058}
3059
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003060
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003061static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003062string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 /* Counterparts of the obsolete stropmodule functions; except
3064 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003065 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3066 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3067 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3068 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003069 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3070 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3071 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3072 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3073 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3074 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3075 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003076 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3077 capitalize__doc__},
3078 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3079 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3080 endswith__doc__},
3081 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3082 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3083 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3084 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3085 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3086 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3087 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3088 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3089 startswith__doc__},
3090 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3091 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3092 swapcase__doc__},
3093 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3094 translate__doc__},
3095 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3096 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3097 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3098 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3099 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3100 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3101 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3102 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3103 expandtabs__doc__},
3104 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3105 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003106 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003107 {NULL, NULL} /* sentinel */
3108};
3109
Jeremy Hylton938ace62002-07-17 16:30:39 +00003110static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003111str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3112
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003113static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003114string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003115{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003116 PyObject *x = NULL;
3117 static char *kwlist[] = {"object", 0};
3118
Guido van Rossumae960af2001-08-30 03:11:59 +00003119 if (type != &PyString_Type)
3120 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003121 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3122 return NULL;
3123 if (x == NULL)
3124 return PyString_FromString("");
3125 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003126}
3127
Guido van Rossumae960af2001-08-30 03:11:59 +00003128static PyObject *
3129str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3130{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003131 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003132 int n;
3133
3134 assert(PyType_IsSubtype(type, &PyString_Type));
3135 tmp = string_new(&PyString_Type, args, kwds);
3136 if (tmp == NULL)
3137 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003138 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003139 n = PyString_GET_SIZE(tmp);
3140 pnew = type->tp_alloc(type, n);
3141 if (pnew != NULL) {
3142 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003143 ((PyStringObject *)pnew)->ob_shash =
3144 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003145 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003146 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003147 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003148 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003149}
3150
Guido van Rossumcacfc072002-05-24 19:01:59 +00003151static PyObject *
3152basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3153{
3154 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003155 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003156 return NULL;
3157}
3158
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003159static PyObject *
3160string_mod(PyObject *v, PyObject *w)
3161{
3162 if (!PyString_Check(v)) {
3163 Py_INCREF(Py_NotImplemented);
3164 return Py_NotImplemented;
3165 }
3166 return PyString_Format(v, w);
3167}
3168
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003169PyDoc_STRVAR(basestring_doc,
3170"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003171
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003172static PyNumberMethods string_as_number = {
3173 0, /*nb_add*/
3174 0, /*nb_subtract*/
3175 0, /*nb_multiply*/
3176 0, /*nb_divide*/
3177 string_mod, /*nb_remainder*/
3178};
3179
3180
Guido van Rossumcacfc072002-05-24 19:01:59 +00003181PyTypeObject PyBaseString_Type = {
3182 PyObject_HEAD_INIT(&PyType_Type)
3183 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003184 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003185 0,
3186 0,
3187 0, /* tp_dealloc */
3188 0, /* tp_print */
3189 0, /* tp_getattr */
3190 0, /* tp_setattr */
3191 0, /* tp_compare */
3192 0, /* tp_repr */
3193 0, /* tp_as_number */
3194 0, /* tp_as_sequence */
3195 0, /* tp_as_mapping */
3196 0, /* tp_hash */
3197 0, /* tp_call */
3198 0, /* tp_str */
3199 0, /* tp_getattro */
3200 0, /* tp_setattro */
3201 0, /* tp_as_buffer */
3202 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3203 basestring_doc, /* tp_doc */
3204 0, /* tp_traverse */
3205 0, /* tp_clear */
3206 0, /* tp_richcompare */
3207 0, /* tp_weaklistoffset */
3208 0, /* tp_iter */
3209 0, /* tp_iternext */
3210 0, /* tp_methods */
3211 0, /* tp_members */
3212 0, /* tp_getset */
3213 &PyBaseObject_Type, /* tp_base */
3214 0, /* tp_dict */
3215 0, /* tp_descr_get */
3216 0, /* tp_descr_set */
3217 0, /* tp_dictoffset */
3218 0, /* tp_init */
3219 0, /* tp_alloc */
3220 basestring_new, /* tp_new */
3221 0, /* tp_free */
3222};
3223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003224PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003225"str(object) -> string\n\
3226\n\
3227Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003228If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003229
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003230PyTypeObject PyString_Type = {
3231 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003232 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003233 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003234 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003235 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003236 (destructor)string_dealloc, /* tp_dealloc */
3237 (printfunc)string_print, /* tp_print */
3238 0, /* tp_getattr */
3239 0, /* tp_setattr */
3240 0, /* tp_compare */
3241 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003242 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003243 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003244 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003245 (hashfunc)string_hash, /* tp_hash */
3246 0, /* tp_call */
3247 (reprfunc)string_str, /* tp_str */
3248 PyObject_GenericGetAttr, /* tp_getattro */
3249 0, /* tp_setattro */
3250 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003251 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3252 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003253 string_doc, /* tp_doc */
3254 0, /* tp_traverse */
3255 0, /* tp_clear */
3256 (richcmpfunc)string_richcompare, /* tp_richcompare */
3257 0, /* tp_weaklistoffset */
3258 0, /* tp_iter */
3259 0, /* tp_iternext */
3260 string_methods, /* tp_methods */
3261 0, /* tp_members */
3262 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003263 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003264 0, /* tp_dict */
3265 0, /* tp_descr_get */
3266 0, /* tp_descr_set */
3267 0, /* tp_dictoffset */
3268 0, /* tp_init */
3269 0, /* tp_alloc */
3270 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003271 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003272};
3273
3274void
Fred Drakeba096332000-07-09 07:04:36 +00003275PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003276{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003278 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003279 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 if (w == NULL || !PyString_Check(*pv)) {
3281 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003282 *pv = NULL;
3283 return;
3284 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003285 v = string_concat((PyStringObject *) *pv, w);
3286 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003287 *pv = v;
3288}
3289
Guido van Rossum013142a1994-08-30 08:19:36 +00003290void
Fred Drakeba096332000-07-09 07:04:36 +00003291PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003292{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003293 PyString_Concat(pv, w);
3294 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003295}
3296
3297
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003298/* The following function breaks the notion that strings are immutable:
3299 it changes the size of a string. We get away with this only if there
3300 is only one module referencing the object. You can also think of it
3301 as creating a new string object and destroying the old one, only
3302 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003303 already be known to some other part of the code...
3304 Note that if there's not enough memory to resize the string, the original
3305 string object at *pv is deallocated, *pv is set to NULL, an "out of
3306 memory" exception is set, and -1 is returned. Else (on success) 0 is
3307 returned, and the value in *pv may or may not be the same as on input.
3308 As always, an extra byte is allocated for a trailing \0 byte (newsize
3309 does *not* include that), and a trailing \0 byte is stored.
3310*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003311
3312int
Fred Drakeba096332000-07-09 07:04:36 +00003313_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003314{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003315 register PyObject *v;
3316 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003317 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003318 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003319 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003320 Py_DECREF(v);
3321 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003322 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003323 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003324 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003325 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003326 _Py_ForgetReference(v);
3327 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003328 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003329 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003330 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003331 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003332 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003333 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003334 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003335 _Py_NewReference(*pv);
3336 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003337 sv->ob_size = newsize;
3338 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003339 return 0;
3340}
Guido van Rossume5372401993-03-16 12:15:04 +00003341
3342/* Helpers for formatstring */
3343
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003344static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003345getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003346{
3347 int argidx = *p_argidx;
3348 if (argidx < arglen) {
3349 (*p_argidx)++;
3350 if (arglen < 0)
3351 return args;
3352 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003353 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003354 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003355 PyErr_SetString(PyExc_TypeError,
3356 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003357 return NULL;
3358}
3359
Tim Peters38fd5b62000-09-21 05:43:11 +00003360/* Format codes
3361 * F_LJUST '-'
3362 * F_SIGN '+'
3363 * F_BLANK ' '
3364 * F_ALT '#'
3365 * F_ZERO '0'
3366 */
Guido van Rossume5372401993-03-16 12:15:04 +00003367#define F_LJUST (1<<0)
3368#define F_SIGN (1<<1)
3369#define F_BLANK (1<<2)
3370#define F_ALT (1<<3)
3371#define F_ZERO (1<<4)
3372
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003373static int
Fred Drakeba096332000-07-09 07:04:36 +00003374formatfloat(char *buf, size_t buflen, int flags,
3375 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003376{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003377 /* fmt = '%#.' + `prec` + `type`
3378 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003379 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003380 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003381 x = PyFloat_AsDouble(v);
3382 if (x == -1.0 && PyErr_Occurred()) {
3383 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003384 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003385 }
Guido van Rossume5372401993-03-16 12:15:04 +00003386 if (prec < 0)
3387 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003388 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3389 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003390 /* Worst case length calc to ensure no buffer overrun:
3391
3392 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003393 fmt = %#.<prec>g
3394 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003395 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003396 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003397
3398 'f' formats:
3399 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3400 len = 1 + 50 + 1 + prec = 52 + prec
3401
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003402 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003403 always given), therefore increase the length by one.
3404
3405 */
3406 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3407 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003408 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003409 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003410 return -1;
3411 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003412 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3413 (flags&F_ALT) ? "#" : "",
3414 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003415 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003416 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003417}
3418
Tim Peters38fd5b62000-09-21 05:43:11 +00003419/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3420 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3421 * Python's regular ints.
3422 * Return value: a new PyString*, or NULL if error.
3423 * . *pbuf is set to point into it,
3424 * *plen set to the # of chars following that.
3425 * Caller must decref it when done using pbuf.
3426 * The string starting at *pbuf is of the form
3427 * "-"? ("0x" | "0X")? digit+
3428 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003429 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003430 * There will be at least prec digits, zero-filled on the left if
3431 * necessary to get that many.
3432 * val object to be converted
3433 * flags bitmask of format flags; only F_ALT is looked at
3434 * prec minimum number of digits; 0-fill on left if needed
3435 * type a character in [duoxX]; u acts the same as d
3436 *
3437 * CAUTION: o, x and X conversions on regular ints can never
3438 * produce a '-' sign, but can for Python's unbounded ints.
3439 */
3440PyObject*
3441_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3442 char **pbuf, int *plen)
3443{
3444 PyObject *result = NULL;
3445 char *buf;
3446 int i;
3447 int sign; /* 1 if '-', else 0 */
3448 int len; /* number of characters */
3449 int numdigits; /* len == numnondigits + numdigits */
3450 int numnondigits = 0;
3451
3452 switch (type) {
3453 case 'd':
3454 case 'u':
3455 result = val->ob_type->tp_str(val);
3456 break;
3457 case 'o':
3458 result = val->ob_type->tp_as_number->nb_oct(val);
3459 break;
3460 case 'x':
3461 case 'X':
3462 numnondigits = 2;
3463 result = val->ob_type->tp_as_number->nb_hex(val);
3464 break;
3465 default:
3466 assert(!"'type' not in [duoxX]");
3467 }
3468 if (!result)
3469 return NULL;
3470
3471 /* To modify the string in-place, there can only be one reference. */
3472 if (result->ob_refcnt != 1) {
3473 PyErr_BadInternalCall();
3474 return NULL;
3475 }
3476 buf = PyString_AsString(result);
3477 len = PyString_Size(result);
3478 if (buf[len-1] == 'L') {
3479 --len;
3480 buf[len] = '\0';
3481 }
3482 sign = buf[0] == '-';
3483 numnondigits += sign;
3484 numdigits = len - numnondigits;
3485 assert(numdigits > 0);
3486
Tim Petersfff53252001-04-12 18:38:48 +00003487 /* Get rid of base marker unless F_ALT */
3488 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003489 /* Need to skip 0x, 0X or 0. */
3490 int skipped = 0;
3491 switch (type) {
3492 case 'o':
3493 assert(buf[sign] == '0');
3494 /* If 0 is only digit, leave it alone. */
3495 if (numdigits > 1) {
3496 skipped = 1;
3497 --numdigits;
3498 }
3499 break;
3500 case 'x':
3501 case 'X':
3502 assert(buf[sign] == '0');
3503 assert(buf[sign + 1] == 'x');
3504 skipped = 2;
3505 numnondigits -= 2;
3506 break;
3507 }
3508 if (skipped) {
3509 buf += skipped;
3510 len -= skipped;
3511 if (sign)
3512 buf[0] = '-';
3513 }
3514 assert(len == numnondigits + numdigits);
3515 assert(numdigits > 0);
3516 }
3517
3518 /* Fill with leading zeroes to meet minimum width. */
3519 if (prec > numdigits) {
3520 PyObject *r1 = PyString_FromStringAndSize(NULL,
3521 numnondigits + prec);
3522 char *b1;
3523 if (!r1) {
3524 Py_DECREF(result);
3525 return NULL;
3526 }
3527 b1 = PyString_AS_STRING(r1);
3528 for (i = 0; i < numnondigits; ++i)
3529 *b1++ = *buf++;
3530 for (i = 0; i < prec - numdigits; i++)
3531 *b1++ = '0';
3532 for (i = 0; i < numdigits; i++)
3533 *b1++ = *buf++;
3534 *b1 = '\0';
3535 Py_DECREF(result);
3536 result = r1;
3537 buf = PyString_AS_STRING(result);
3538 len = numnondigits + prec;
3539 }
3540
3541 /* Fix up case for hex conversions. */
3542 switch (type) {
3543 case 'x':
3544 /* Need to convert all upper case letters to lower case. */
3545 for (i = 0; i < len; i++)
3546 if (buf[i] >= 'A' && buf[i] <= 'F')
3547 buf[i] += 'a'-'A';
3548 break;
3549 case 'X':
3550 /* Need to convert 0x to 0X (and -0x to -0X). */
3551 if (buf[sign + 1] == 'x')
3552 buf[sign + 1] = 'X';
3553 break;
3554 }
3555 *pbuf = buf;
3556 *plen = len;
3557 return result;
3558}
3559
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003560static int
Fred Drakeba096332000-07-09 07:04:36 +00003561formatint(char *buf, size_t buflen, int flags,
3562 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003563{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003564 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003565 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3566 + 1 + 1 = 24 */
3567 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003568 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003569 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003570
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003571 x = PyInt_AsLong(v);
3572 if (x == -1 && PyErr_Occurred()) {
3573 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003574 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003575 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003576 if (x < 0 && type == 'u') {
3577 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003578 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003579 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3580 sign = "-";
3581 else
3582 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003583 if (prec < 0)
3584 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003585
3586 if ((flags & F_ALT) &&
3587 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003588 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003589 * of issues that cause pain:
3590 * - when 0 is being converted, the C standard leaves off
3591 * the '0x' or '0X', which is inconsistent with other
3592 * %#x/%#X conversions and inconsistent with Python's
3593 * hex() function
3594 * - there are platforms that violate the standard and
3595 * convert 0 with the '0x' or '0X'
3596 * (Metrowerks, Compaq Tru64)
3597 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003598 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003599 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003600 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003601 * We can achieve the desired consistency by inserting our
3602 * own '0x' or '0X' prefix, and substituting %x/%X in place
3603 * of %#x/%#X.
3604 *
3605 * Note that this is the same approach as used in
3606 * formatint() in unicodeobject.c
3607 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003608 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3609 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003610 }
3611 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003612 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3613 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003614 prec, type);
3615 }
3616
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003617 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3618 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003619 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003620 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003621 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003622 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003623 return -1;
3624 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003625 if (sign[0])
3626 PyOS_snprintf(buf, buflen, fmt, -x);
3627 else
3628 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003629 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003630}
3631
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003632static int
Fred Drakeba096332000-07-09 07:04:36 +00003633formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003634{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003635 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003636 if (PyString_Check(v)) {
3637 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003638 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003639 }
3640 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003641 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003642 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003643 }
3644 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003645 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003646}
3647
Guido van Rossum013142a1994-08-30 08:19:36 +00003648
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003649/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3650
3651 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3652 chars are formatted. XXX This is a magic number. Each formatting
3653 routine does bounds checking to ensure no overflow, but a better
3654 solution may be to malloc a buffer of appropriate size for each
3655 format. For now, the current solution is sufficient.
3656*/
3657#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003658
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003659PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003660PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003661{
3662 char *fmt, *res;
3663 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003664 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003665 PyObject *result, *orig_args;
3666#ifdef Py_USING_UNICODE
3667 PyObject *v, *w;
3668#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003669 PyObject *dict = NULL;
3670 if (format == NULL || !PyString_Check(format) || args == NULL) {
3671 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003672 return NULL;
3673 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003674 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003675 fmt = PyString_AS_STRING(format);
3676 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003677 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003678 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003679 if (result == NULL)
3680 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003681 res = PyString_AsString(result);
3682 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003683 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003684 argidx = 0;
3685 }
3686 else {
3687 arglen = -1;
3688 argidx = -2;
3689 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003690 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3691 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003692 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003693 while (--fmtcnt >= 0) {
3694 if (*fmt != '%') {
3695 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003696 rescnt = fmtcnt + 100;
3697 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003698 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003699 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003700 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003701 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003702 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003703 }
3704 *res++ = *fmt++;
3705 }
3706 else {
3707 /* Got a format specifier */
3708 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003709 int width = -1;
3710 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003711 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003712 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003713 PyObject *v = NULL;
3714 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003715 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003716 int sign;
3717 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003718 char formatbuf[FORMATBUFLEN];
3719 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003720#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003721 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003722 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003723#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003724
Guido van Rossumda9c2711996-12-05 21:58:58 +00003725 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003726 if (*fmt == '(') {
3727 char *keystart;
3728 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003729 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003730 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003731
3732 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003733 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003734 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003735 goto error;
3736 }
3737 ++fmt;
3738 --fmtcnt;
3739 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003740 /* Skip over balanced parentheses */
3741 while (pcount > 0 && --fmtcnt >= 0) {
3742 if (*fmt == ')')
3743 --pcount;
3744 else if (*fmt == '(')
3745 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003746 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003747 }
3748 keylen = fmt - keystart - 1;
3749 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003750 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003751 "incomplete format key");
3752 goto error;
3753 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003754 key = PyString_FromStringAndSize(keystart,
3755 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003756 if (key == NULL)
3757 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003758 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003759 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003760 args_owned = 0;
3761 }
3762 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003763 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003764 if (args == NULL) {
3765 goto error;
3766 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003767 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003768 arglen = -1;
3769 argidx = -2;
3770 }
Guido van Rossume5372401993-03-16 12:15:04 +00003771 while (--fmtcnt >= 0) {
3772 switch (c = *fmt++) {
3773 case '-': flags |= F_LJUST; continue;
3774 case '+': flags |= F_SIGN; continue;
3775 case ' ': flags |= F_BLANK; continue;
3776 case '#': flags |= F_ALT; continue;
3777 case '0': flags |= F_ZERO; continue;
3778 }
3779 break;
3780 }
3781 if (c == '*') {
3782 v = getnextarg(args, arglen, &argidx);
3783 if (v == NULL)
3784 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003785 if (!PyInt_Check(v)) {
3786 PyErr_SetString(PyExc_TypeError,
3787 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003788 goto error;
3789 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003790 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003791 if (width < 0) {
3792 flags |= F_LJUST;
3793 width = -width;
3794 }
Guido van Rossume5372401993-03-16 12:15:04 +00003795 if (--fmtcnt >= 0)
3796 c = *fmt++;
3797 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003798 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003799 width = c - '0';
3800 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003801 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003802 if (!isdigit(c))
3803 break;
3804 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003805 PyErr_SetString(
3806 PyExc_ValueError,
3807 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003808 goto error;
3809 }
3810 width = width*10 + (c - '0');
3811 }
3812 }
3813 if (c == '.') {
3814 prec = 0;
3815 if (--fmtcnt >= 0)
3816 c = *fmt++;
3817 if (c == '*') {
3818 v = getnextarg(args, arglen, &argidx);
3819 if (v == NULL)
3820 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003821 if (!PyInt_Check(v)) {
3822 PyErr_SetString(
3823 PyExc_TypeError,
3824 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003825 goto error;
3826 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003827 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003828 if (prec < 0)
3829 prec = 0;
3830 if (--fmtcnt >= 0)
3831 c = *fmt++;
3832 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003833 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003834 prec = c - '0';
3835 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003836 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003837 if (!isdigit(c))
3838 break;
3839 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003840 PyErr_SetString(
3841 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003842 "prec too big");
3843 goto error;
3844 }
3845 prec = prec*10 + (c - '0');
3846 }
3847 }
3848 } /* prec */
3849 if (fmtcnt >= 0) {
3850 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003851 if (--fmtcnt >= 0)
3852 c = *fmt++;
3853 }
3854 }
3855 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003856 PyErr_SetString(PyExc_ValueError,
3857 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003858 goto error;
3859 }
3860 if (c != '%') {
3861 v = getnextarg(args, arglen, &argidx);
3862 if (v == NULL)
3863 goto error;
3864 }
3865 sign = 0;
3866 fill = ' ';
3867 switch (c) {
3868 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003869 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003870 len = 1;
3871 break;
3872 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003873#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003874 if (PyUnicode_Check(v)) {
3875 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003876 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003877 goto unicode;
3878 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003879#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003880 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00003881 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003882 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003883 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003884 else
3885 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003886 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003887 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003888 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003889 /* XXX Note: this should never happen,
3890 since PyObject_Repr() and
3891 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003892 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003893 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003894 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003895 goto error;
3896 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003897 pbuf = PyString_AS_STRING(temp);
3898 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003899 if (prec >= 0 && len > prec)
3900 len = prec;
3901 break;
3902 case 'i':
3903 case 'd':
3904 case 'u':
3905 case 'o':
3906 case 'x':
3907 case 'X':
3908 if (c == 'i')
3909 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003910 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003911 temp = _PyString_FormatLong(v, flags,
3912 prec, c, &pbuf, &len);
3913 if (!temp)
3914 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00003915 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003916 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003917 else {
3918 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003919 len = formatint(pbuf,
3920 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003921 flags, prec, c, v);
3922 if (len < 0)
3923 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003924 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003925 }
3926 if (flags & F_ZERO)
3927 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003928 break;
3929 case 'e':
3930 case 'E':
3931 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003932 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00003933 case 'g':
3934 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00003935 if (c == 'F')
3936 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003937 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003938 len = formatfloat(pbuf, sizeof(formatbuf),
3939 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003940 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003941 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003942 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003943 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003944 fill = '0';
3945 break;
3946 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00003947#ifdef Py_USING_UNICODE
3948 if (PyUnicode_Check(v)) {
3949 fmt = fmt_start;
3950 argidx = argidx_start;
3951 goto unicode;
3952 }
3953#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003954 pbuf = formatbuf;
3955 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003956 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003957 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003958 break;
3959 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003960 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003961 "unsupported format character '%c' (0x%x) "
3962 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003963 c, c,
3964 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003965 goto error;
3966 }
3967 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003968 if (*pbuf == '-' || *pbuf == '+') {
3969 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003970 len--;
3971 }
3972 else if (flags & F_SIGN)
3973 sign = '+';
3974 else if (flags & F_BLANK)
3975 sign = ' ';
3976 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003977 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003978 }
3979 if (width < len)
3980 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003981 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003982 reslen -= rescnt;
3983 rescnt = width + fmtcnt + 100;
3984 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003985 if (reslen < 0) {
3986 Py_DECREF(result);
3987 return PyErr_NoMemory();
3988 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003989 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003990 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003991 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003992 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003993 }
3994 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003995 if (fill != ' ')
3996 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003997 rescnt--;
3998 if (width > len)
3999 width--;
4000 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004001 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4002 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004003 assert(pbuf[1] == c);
4004 if (fill != ' ') {
4005 *res++ = *pbuf++;
4006 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004007 }
Tim Petersfff53252001-04-12 18:38:48 +00004008 rescnt -= 2;
4009 width -= 2;
4010 if (width < 0)
4011 width = 0;
4012 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004013 }
4014 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004015 do {
4016 --rescnt;
4017 *res++ = fill;
4018 } while (--width > len);
4019 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004020 if (fill == ' ') {
4021 if (sign)
4022 *res++ = sign;
4023 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004024 (c == 'x' || c == 'X')) {
4025 assert(pbuf[0] == '0');
4026 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004027 *res++ = *pbuf++;
4028 *res++ = *pbuf++;
4029 }
4030 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004031 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004032 res += len;
4033 rescnt -= len;
4034 while (--width >= len) {
4035 --rescnt;
4036 *res++ = ' ';
4037 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004038 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004039 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004040 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004041 goto error;
4042 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004044 } /* '%' */
4045 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004046 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004047 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004048 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004049 goto error;
4050 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004051 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004053 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004054 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004055 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004056
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004057#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004058 unicode:
4059 if (args_owned) {
4060 Py_DECREF(args);
4061 args_owned = 0;
4062 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004063 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004064 if (PyTuple_Check(orig_args) && argidx > 0) {
4065 PyObject *v;
4066 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4067 v = PyTuple_New(n);
4068 if (v == NULL)
4069 goto error;
4070 while (--n >= 0) {
4071 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4072 Py_INCREF(w);
4073 PyTuple_SET_ITEM(v, n, w);
4074 }
4075 args = v;
4076 } else {
4077 Py_INCREF(orig_args);
4078 args = orig_args;
4079 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004080 args_owned = 1;
4081 /* Take what we have of the result and let the Unicode formatting
4082 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004083 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004084 if (_PyString_Resize(&result, rescnt))
4085 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004086 fmtcnt = PyString_GET_SIZE(format) - \
4087 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004088 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4089 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004090 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004091 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004092 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004093 if (v == NULL)
4094 goto error;
4095 /* Paste what we have (result) to what the Unicode formatting
4096 function returned (v) and return the result (or error) */
4097 w = PyUnicode_Concat(result, v);
4098 Py_DECREF(result);
4099 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004100 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004101 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004102#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004103
Guido van Rossume5372401993-03-16 12:15:04 +00004104 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004105 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004106 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004107 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004108 }
Guido van Rossume5372401993-03-16 12:15:04 +00004109 return NULL;
4110}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004111
Guido van Rossum2a61e741997-01-18 07:55:05 +00004112void
Fred Drakeba096332000-07-09 07:04:36 +00004113PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004114{
4115 register PyStringObject *s = (PyStringObject *)(*p);
4116 PyObject *t;
4117 if (s == NULL || !PyString_Check(s))
4118 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004119 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004120 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004121 if (interned == NULL) {
4122 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004123 if (interned == NULL) {
4124 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004125 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004126 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004127 }
4128 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4129 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004130 Py_DECREF(*p);
4131 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004132 return;
4133 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004134 /* Ensure that only true string objects appear in the intern dict */
4135 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004136 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4137 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004138 if (t == NULL) {
4139 PyErr_Clear();
4140 return;
Tim Peters111f6092001-09-12 07:54:51 +00004141 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004142 } else {
4143 t = (PyObject*) s;
4144 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004145 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004146
4147 if (PyDict_SetItem(interned, t, t) == 0) {
4148 /* The two references in interned are not counted by
4149 refcnt. The string deallocator will take care of this */
4150 ((PyObject *)t)->ob_refcnt-=2;
4151 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4152 Py_DECREF(*p);
4153 *p = t;
4154 return;
4155 }
4156 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004157 PyErr_Clear();
4158}
4159
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004160void
4161PyString_InternImmortal(PyObject **p)
4162{
4163 PyString_InternInPlace(p);
4164 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4165 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4166 Py_INCREF(*p);
4167 }
4168}
4169
Guido van Rossum2a61e741997-01-18 07:55:05 +00004170
4171PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004172PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004173{
4174 PyObject *s = PyString_FromString(cp);
4175 if (s == NULL)
4176 return NULL;
4177 PyString_InternInPlace(&s);
4178 return s;
4179}
4180
Guido van Rossum8cf04761997-08-02 02:57:45 +00004181void
Fred Drakeba096332000-07-09 07:04:36 +00004182PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004183{
4184 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004185 for (i = 0; i < UCHAR_MAX + 1; i++) {
4186 Py_XDECREF(characters[i]);
4187 characters[i] = NULL;
4188 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004189 Py_XDECREF(nullstring);
4190 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004191}
Barry Warsawa903ad982001-02-23 16:40:48 +00004192
Barry Warsawa903ad982001-02-23 16:40:48 +00004193void _Py_ReleaseInternedStrings(void)
4194{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004195 PyObject *keys;
4196 PyStringObject *s;
4197 int i, n;
4198
4199 if (interned == NULL || !PyDict_Check(interned))
4200 return;
4201 keys = PyDict_Keys(interned);
4202 if (keys == NULL || !PyList_Check(keys)) {
4203 PyErr_Clear();
4204 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004205 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004206
4207 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4208 detector, interned strings are not forcibly deallocated; rather, we
4209 give them their stolen references back, and then clear and DECREF
4210 the interned dict. */
4211
4212 fprintf(stderr, "releasing interned strings\n");
4213 n = PyList_GET_SIZE(keys);
4214 for (i = 0; i < n; i++) {
4215 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4216 switch (s->ob_sstate) {
4217 case SSTATE_NOT_INTERNED:
4218 /* XXX Shouldn't happen */
4219 break;
4220 case SSTATE_INTERNED_IMMORTAL:
4221 s->ob_refcnt += 1;
4222 break;
4223 case SSTATE_INTERNED_MORTAL:
4224 s->ob_refcnt += 2;
4225 break;
4226 default:
4227 Py_FatalError("Inconsistent interned string state.");
4228 }
4229 s->ob_sstate = SSTATE_NOT_INTERNED;
4230 }
4231 Py_DECREF(keys);
4232 PyDict_Clear(interned);
4233 Py_DECREF(interned);
4234 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004235}