blob: dd38ee3c859605531ade7ff7136daf428c7bdc38 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
544#ifdef Py_USING_UNICODE
545 if (recode_encoding && (*s & 0x80)) {
546 PyObject *u, *w;
547 char *r;
548 const char* t;
549 int rn;
550 t = s;
551 /* Decode non-ASCII bytes as UTF-8. */
552 while (t < end && (*t & 0x80)) t++;
553 u = PyUnicode_DecodeUTF8(s, t - s, errors);
554 if(!u) goto failed;
555
556 /* Recode them in target encoding. */
557 w = PyUnicode_AsEncodedString(
558 u, recode_encoding, errors);
559 Py_DECREF(u);
560 if (!w) goto failed;
561
562 /* Append bytes to output buffer. */
563 r = PyString_AsString(w);
564 rn = PyString_Size(w);
565 memcpy(p, r, rn);
566 p += rn;
567 Py_DECREF(w);
568 s = t;
569 } else {
570 *p++ = *s++;
571 }
572#else
573 *p++ = *s++;
574#endif
575 continue;
576 }
577 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000578 if (s==end) {
579 PyErr_SetString(PyExc_ValueError,
580 "Trailing \\ in string");
581 goto failed;
582 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000583 switch (*s++) {
584 /* XXX This assumes ASCII! */
585 case '\n': break;
586 case '\\': *p++ = '\\'; break;
587 case '\'': *p++ = '\''; break;
588 case '\"': *p++ = '\"'; break;
589 case 'b': *p++ = '\b'; break;
590 case 'f': *p++ = '\014'; break; /* FF */
591 case 't': *p++ = '\t'; break;
592 case 'n': *p++ = '\n'; break;
593 case 'r': *p++ = '\r'; break;
594 case 'v': *p++ = '\013'; break; /* VT */
595 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
596 case '0': case '1': case '2': case '3':
597 case '4': case '5': case '6': case '7':
598 c = s[-1] - '0';
599 if ('0' <= *s && *s <= '7') {
600 c = (c<<3) + *s++ - '0';
601 if ('0' <= *s && *s <= '7')
602 c = (c<<3) + *s++ - '0';
603 }
604 *p++ = c;
605 break;
606 case 'x':
607 if (isxdigit(Py_CHARMASK(s[0]))
608 && isxdigit(Py_CHARMASK(s[1]))) {
609 unsigned int x = 0;
610 c = Py_CHARMASK(*s);
611 s++;
612 if (isdigit(c))
613 x = c - '0';
614 else if (islower(c))
615 x = 10 + c - 'a';
616 else
617 x = 10 + c - 'A';
618 x = x << 4;
619 c = Py_CHARMASK(*s);
620 s++;
621 if (isdigit(c))
622 x += c - '0';
623 else if (islower(c))
624 x += 10 + c - 'a';
625 else
626 x += 10 + c - 'A';
627 *p++ = x;
628 break;
629 }
630 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 PyErr_SetString(PyExc_ValueError,
632 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000633 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 }
635 if (strcmp(errors, "replace") == 0) {
636 *p++ = '?';
637 } else if (strcmp(errors, "ignore") == 0)
638 /* do nothing */;
639 else {
640 PyErr_Format(PyExc_ValueError,
641 "decoding error; "
642 "unknown error handling code: %.400s",
643 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000644 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 }
646#ifndef Py_USING_UNICODE
647 case 'u':
648 case 'U':
649 case 'N':
650 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "Unicode escapes not legal "
653 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#endif
657 default:
658 *p++ = '\\';
659 *p++ = s[-1];
660 break;
661 }
662 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000663 if (p-buf < newlen)
664 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 return v;
666 failed:
667 Py_DECREF(v);
668 return NULL;
669}
670
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000671static int
672string_getsize(register PyObject *op)
673{
674 char *s;
675 int len;
676 if (PyString_AsStringAndSize(op, &s, &len))
677 return -1;
678 return len;
679}
680
681static /*const*/ char *
682string_getbuffer(register PyObject *op)
683{
684 char *s;
685 int len;
686 if (PyString_AsStringAndSize(op, &s, &len))
687 return NULL;
688 return s;
689}
690
Guido van Rossumd7047b31995-01-02 19:07:15 +0000691int
Fred Drakeba096332000-07-09 07:04:36 +0000692PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000694 if (!PyString_Check(op))
695 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000696 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
699/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000700PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (!PyString_Check(op))
703 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000704 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000705}
706
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707int
708PyString_AsStringAndSize(register PyObject *obj,
709 register char **s,
710 register int *len)
711{
712 if (s == NULL) {
713 PyErr_BadInternalCall();
714 return -1;
715 }
716
717 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000718#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719 if (PyUnicode_Check(obj)) {
720 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
721 if (obj == NULL)
722 return -1;
723 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000724 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000725#endif
726 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000727 PyErr_Format(PyExc_TypeError,
728 "expected string or Unicode object, "
729 "%.200s found", obj->ob_type->tp_name);
730 return -1;
731 }
732 }
733
734 *s = PyString_AS_STRING(obj);
735 if (len != NULL)
736 *len = PyString_GET_SIZE(obj);
737 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
738 PyErr_SetString(PyExc_TypeError,
739 "expected string without null bytes");
740 return -1;
741 }
742 return 0;
743}
744
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745/* Methods */
746
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000747static int
Fred Drakeba096332000-07-09 07:04:36 +0000748string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749{
750 int i;
751 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000752 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000753
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000754 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000755 if (! PyString_CheckExact(op)) {
756 int ret;
757 /* A str subclass may have its own __str__ method. */
758 op = (PyStringObject *) PyObject_Str((PyObject *)op);
759 if (op == NULL)
760 return -1;
761 ret = string_print(op, fp, flags);
762 Py_DECREF(op);
763 return ret;
764 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000766 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000767 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000769
Thomas Wouters7e474022000-07-16 12:04:32 +0000770 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000772 if (memchr(op->ob_sval, '\'', op->ob_size) &&
773 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 quote = '"';
775
776 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 for (i = 0; i < op->ob_size; i++) {
778 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000779 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000781 else if (c == '\t')
782 fprintf(fp, "\\t");
783 else if (c == '\n')
784 fprintf(fp, "\\n");
785 else if (c == '\r')
786 fprintf(fp, "\\r");
787 else if (c < ' ' || c >= 0x7f)
788 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000793 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794}
795
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000796PyObject *
797PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000799 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000800 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
801 PyObject *v;
802 if (newsize > INT_MAX) {
803 PyErr_SetString(PyExc_OverflowError,
804 "string is too large to make repr");
805 }
806 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 }
810 else {
811 register int i;
812 register char c;
813 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814 int quote;
815
Thomas Wouters7e474022000-07-16 12:04:32 +0000816 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000818 if (smartquotes &&
819 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000820 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000821 quote = '"';
822
Tim Peters9161c8b2001-12-03 01:55:38 +0000823 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000826 /* There's at least enough room for a hex escape
827 and a closing quote. */
828 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000832 else if (c == '\t')
833 *p++ = '\\', *p++ = 't';
834 else if (c == '\n')
835 *p++ = '\\', *p++ = 'n';
836 else if (c == '\r')
837 *p++ = '\\', *p++ = 'r';
838 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000839 /* For performance, we don't want to call
840 PyOS_snprintf here (extra layers of
841 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 sprintf(p, "\\x%02x", c & 0xff);
843 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 }
845 else
846 *p++ = c;
847 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000851 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000852 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855}
856
Guido van Rossum189f1df2001-05-01 16:51:53 +0000857static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000858string_repr(PyObject *op)
859{
860 return PyString_Repr(op, 1);
861}
862
863static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000864string_str(PyObject *s)
865{
Tim Petersc9933152001-10-16 20:18:24 +0000866 assert(PyString_Check(s));
867 if (PyString_CheckExact(s)) {
868 Py_INCREF(s);
869 return s;
870 }
871 else {
872 /* Subtype -- return genuine string with the same value. */
873 PyStringObject *t = (PyStringObject *) s;
874 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
875 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000876}
877
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878static int
Fred Drakeba096332000-07-09 07:04:36 +0000879string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880{
881 return a->ob_size;
882}
883
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000884static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000885string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000888 register PyStringObject *op;
889 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000890#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 if (PyUnicode_Check(bb))
892 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000893#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000894 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000895 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000896 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 return NULL;
898 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000901 if ((a->ob_size == 0 || b->ob_size == 0) &&
902 PyString_CheckExact(a) && PyString_CheckExact(b)) {
903 if (a->ob_size == 0) {
904 Py_INCREF(bb);
905 return bb;
906 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 Py_INCREF(a);
908 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
910 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000911 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000912 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000913 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000914 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000916 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000918 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
920 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
921 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923#undef b
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
929 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000930 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000932 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933 if (n < 0)
934 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000935 /* watch out for overflows: the size can overflow int,
936 * and the # of bytes needed can overflow size_t
937 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000939 if (n && size / n != a->ob_size) {
940 PyErr_SetString(PyExc_OverflowError,
941 "repeated string is too long");
942 return NULL;
943 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000944 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 Py_INCREF(a);
946 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000947 }
Tim Peters8f422462000-09-09 06:13:41 +0000948 nbytes = size * sizeof(char);
949 if (nbytes / sizeof(char) != (size_t)size ||
950 nbytes + sizeof(PyStringObject) <= nbytes) {
951 PyErr_SetString(PyExc_OverflowError,
952 "repeated string is too long");
953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000956 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000957 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000959 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000961 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000962 for (i = 0; i < size; i += a->ob_size)
963 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
964 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000966}
967
968/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
969
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000971string_slice(register PyStringObject *a, register int i, register int j)
972 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973{
974 if (i < 0)
975 i = 0;
976 if (j < 0)
977 j = 0; /* Avoid signed/unsigned bug in next line */
978 if (j > a->ob_size)
979 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000980 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
981 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 Py_INCREF(a);
983 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 }
985 if (j < i)
986 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988}
989
Guido van Rossum9284a572000-03-07 15:53:43 +0000990static int
Fred Drakeba096332000-07-09 07:04:36 +0000991string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000992{
Barry Warsaw817918c2002-08-06 16:58:21 +0000993 const char *lhs, *rhs, *end;
994 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000995
996 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000997#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000998 if (PyUnicode_Check(el))
999 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001000#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001001 if (!PyString_Check(el)) {
1002 PyErr_SetString(PyExc_TypeError,
1003 "'in <string>' requires string as left operand");
1004 return -1;
1005 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001006 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001007 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001008 rhs = PyString_AS_STRING(el);
1009 lhs = PyString_AS_STRING(a);
1010
1011 /* optimize for a single character */
1012 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001014
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001015 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001016 while (lhs <= end) {
1017 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001018 return 1;
1019 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001020
Guido van Rossum9284a572000-03-07 15:53:43 +00001021 return 0;
1022}
1023
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001024static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001025string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001026{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001028 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001029 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001030 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001031 return NULL;
1032 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001033 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001034 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001035 if (v == NULL)
1036 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001037 else {
1038#ifdef COUNT_ALLOCS
1039 one_strings++;
1040#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001041 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001042 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001043 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044}
1045
Martin v. Löwiscd353062001-05-24 16:56:35 +00001046static PyObject*
1047string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001049 int c;
1050 int len_a, len_b;
1051 int min_len;
1052 PyObject *result;
1053
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001054 /* Make sure both arguments are strings. */
1055 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001056 result = Py_NotImplemented;
1057 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001058 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001059 if (a == b) {
1060 switch (op) {
1061 case Py_EQ:case Py_LE:case Py_GE:
1062 result = Py_True;
1063 goto out;
1064 case Py_NE:case Py_LT:case Py_GT:
1065 result = Py_False;
1066 goto out;
1067 }
1068 }
1069 if (op == Py_EQ) {
1070 /* Supporting Py_NE here as well does not save
1071 much time, since Py_NE is rarely used. */
1072 if (a->ob_size == b->ob_size
1073 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001074 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 a->ob_size) == 0)) {
1076 result = Py_True;
1077 } else {
1078 result = Py_False;
1079 }
1080 goto out;
1081 }
1082 len_a = a->ob_size; len_b = b->ob_size;
1083 min_len = (len_a < len_b) ? len_a : len_b;
1084 if (min_len > 0) {
1085 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1086 if (c==0)
1087 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1088 }else
1089 c = 0;
1090 if (c == 0)
1091 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1092 switch (op) {
1093 case Py_LT: c = c < 0; break;
1094 case Py_LE: c = c <= 0; break;
1095 case Py_EQ: assert(0); break; /* unreachable */
1096 case Py_NE: c = c != 0; break;
1097 case Py_GT: c = c > 0; break;
1098 case Py_GE: c = c >= 0; break;
1099 default:
1100 result = Py_NotImplemented;
1101 goto out;
1102 }
1103 result = c ? Py_True : Py_False;
1104 out:
1105 Py_INCREF(result);
1106 return result;
1107}
1108
1109int
1110_PyString_Eq(PyObject *o1, PyObject *o2)
1111{
1112 PyStringObject *a, *b;
1113 a = (PyStringObject*)o1;
1114 b = (PyStringObject*)o2;
1115 return a->ob_size == b->ob_size
1116 && *a->ob_sval == *b->ob_sval
1117 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001118}
1119
Guido van Rossum9bfef441993-03-29 10:43:31 +00001120static long
Fred Drakeba096332000-07-09 07:04:36 +00001121string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001122{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001123 register int len;
1124 register unsigned char *p;
1125 register long x;
1126
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001127 if (a->ob_shash != -1)
1128 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 len = a->ob_size;
1130 p = (unsigned char *) a->ob_sval;
1131 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001132 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001133 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001134 x ^= a->ob_size;
1135 if (x == -1)
1136 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001137 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001138 return x;
1139}
1140
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001141static PyObject*
1142string_subscript(PyStringObject* self, PyObject* item)
1143{
1144 if (PyInt_Check(item)) {
1145 long i = PyInt_AS_LONG(item);
1146 if (i < 0)
1147 i += PyString_GET_SIZE(self);
1148 return string_item(self,i);
1149 }
1150 else if (PyLong_Check(item)) {
1151 long i = PyLong_AsLong(item);
1152 if (i == -1 && PyErr_Occurred())
1153 return NULL;
1154 if (i < 0)
1155 i += PyString_GET_SIZE(self);
1156 return string_item(self,i);
1157 }
1158 else if (PySlice_Check(item)) {
1159 int start, stop, step, slicelength, cur, i;
1160 char* source_buf;
1161 char* result_buf;
1162 PyObject* result;
1163
1164 if (PySlice_GetIndicesEx((PySliceObject*)item,
1165 PyString_GET_SIZE(self),
1166 &start, &stop, &step, &slicelength) < 0) {
1167 return NULL;
1168 }
1169
1170 if (slicelength <= 0) {
1171 return PyString_FromStringAndSize("", 0);
1172 }
1173 else {
1174 source_buf = PyString_AsString((PyObject*)self);
1175 result_buf = PyMem_Malloc(slicelength);
1176
1177 for (cur = start, i = 0; i < slicelength;
1178 cur += step, i++) {
1179 result_buf[i] = source_buf[cur];
1180 }
1181
1182 result = PyString_FromStringAndSize(result_buf,
1183 slicelength);
1184 PyMem_Free(result_buf);
1185 return result;
1186 }
1187 }
1188 else {
1189 PyErr_SetString(PyExc_TypeError,
1190 "string indices must be integers");
1191 return NULL;
1192 }
1193}
1194
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001195static int
Fred Drakeba096332000-07-09 07:04:36 +00001196string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001197{
1198 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001199 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001200 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001201 return -1;
1202 }
1203 *ptr = (void *)self->ob_sval;
1204 return self->ob_size;
1205}
1206
1207static int
Fred Drakeba096332000-07-09 07:04:36 +00001208string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001209{
Guido van Rossum045e6881997-09-08 18:30:11 +00001210 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001211 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212 return -1;
1213}
1214
1215static int
Fred Drakeba096332000-07-09 07:04:36 +00001216string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001217{
1218 if ( lenp )
1219 *lenp = self->ob_size;
1220 return 1;
1221}
1222
Guido van Rossum1db70701998-10-08 02:18:52 +00001223static int
Fred Drakeba096332000-07-09 07:04:36 +00001224string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001225{
1226 if ( index != 0 ) {
1227 PyErr_SetString(PyExc_SystemError,
1228 "accessing non-existent string segment");
1229 return -1;
1230 }
1231 *ptr = self->ob_sval;
1232 return self->ob_size;
1233}
1234
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001235static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001236 (inquiry)string_length, /*sq_length*/
1237 (binaryfunc)string_concat, /*sq_concat*/
1238 (intargfunc)string_repeat, /*sq_repeat*/
1239 (intargfunc)string_item, /*sq_item*/
1240 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001241 0, /*sq_ass_item*/
1242 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001243 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001244};
1245
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001246static PyMappingMethods string_as_mapping = {
1247 (inquiry)string_length,
1248 (binaryfunc)string_subscript,
1249 0,
1250};
1251
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252static PyBufferProcs string_as_buffer = {
1253 (getreadbufferproc)string_buffer_getreadbuf,
1254 (getwritebufferproc)string_buffer_getwritebuf,
1255 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001256 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257};
1258
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001259
1260
1261#define LEFTSTRIP 0
1262#define RIGHTSTRIP 1
1263#define BOTHSTRIP 2
1264
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001265/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001266static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1267
1268#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
1271static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001272split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275 PyObject* item;
1276 PyObject *list = PyList_New(0);
1277
1278 if (list == NULL)
1279 return NULL;
1280
Guido van Rossum4c08d552000-03-10 22:55:18 +00001281 for (i = j = 0; i < len; ) {
1282 while (i < len && isspace(Py_CHARMASK(s[i])))
1283 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001285 while (i < len && !isspace(Py_CHARMASK(s[i])))
1286 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001287 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 if (maxsplit-- <= 0)
1289 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1291 if (item == NULL)
1292 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 err = PyList_Append(list, item);
1294 Py_DECREF(item);
1295 if (err < 0)
1296 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 while (i < len && isspace(Py_CHARMASK(s[i])))
1298 i++;
1299 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 }
1301 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001302 if (j < len) {
1303 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1304 if (item == NULL)
1305 goto finally;
1306 err = PyList_Append(list, item);
1307 Py_DECREF(item);
1308 if (err < 0)
1309 goto finally;
1310 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 return list;
1312 finally:
1313 Py_DECREF(list);
1314 return NULL;
1315}
1316
1317
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001318PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319"S.split([sep [,maxsplit]]) -> list of strings\n\
1320\n\
1321Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001323splits are done. If sep is not specified or is None, any\n\
1324whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325
1326static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001327string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328{
1329 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 int maxsplit = -1;
1331 const char *s = PyString_AS_STRING(self), *sub;
1332 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333
Guido van Rossum4c08d552000-03-10 22:55:18 +00001334 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 if (maxsplit < 0)
1337 maxsplit = INT_MAX;
1338 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001340 if (PyString_Check(subobj)) {
1341 sub = PyString_AS_STRING(subobj);
1342 n = PyString_GET_SIZE(subobj);
1343 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001344#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 else if (PyUnicode_Check(subobj))
1346 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001347#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1349 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 if (n == 0) {
1351 PyErr_SetString(PyExc_ValueError, "empty separator");
1352 return NULL;
1353 }
1354
1355 list = PyList_New(0);
1356 if (list == NULL)
1357 return NULL;
1358
1359 i = j = 0;
1360 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001361 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001362 if (maxsplit-- <= 0)
1363 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1365 if (item == NULL)
1366 goto fail;
1367 err = PyList_Append(list, item);
1368 Py_DECREF(item);
1369 if (err < 0)
1370 goto fail;
1371 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 }
1373 else
1374 i++;
1375 }
1376 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1377 if (item == NULL)
1378 goto fail;
1379 err = PyList_Append(list, item);
1380 Py_DECREF(item);
1381 if (err < 0)
1382 goto fail;
1383
1384 return list;
1385
1386 fail:
1387 Py_DECREF(list);
1388 return NULL;
1389}
1390
1391
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001392PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393"S.join(sequence) -> string\n\
1394\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001396sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397
1398static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001399string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400{
1401 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001402 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404 char *p;
1405 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001406 size_t sz = 0;
1407 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001408 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409
Tim Peters19fe14e2001-01-19 03:03:47 +00001410 seq = PySequence_Fast(orig, "");
1411 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001412 if (PyErr_ExceptionMatches(PyExc_TypeError))
1413 PyErr_Format(PyExc_TypeError,
1414 "sequence expected, %.80s found",
1415 orig->ob_type->tp_name);
1416 return NULL;
1417 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001418
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001419 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001420 if (seqlen == 0) {
1421 Py_DECREF(seq);
1422 return PyString_FromString("");
1423 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001425 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001426 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1427 PyErr_Format(PyExc_TypeError,
1428 "sequence item 0: expected string,"
1429 " %.80s found",
1430 item->ob_type->tp_name);
1431 Py_DECREF(seq);
1432 return NULL;
1433 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001434 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001435 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001436 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001438
Tim Peters19fe14e2001-01-19 03:03:47 +00001439 /* There are at least two things to join. Do a pre-pass to figure out
1440 * the total amount of space we'll need (sz), see whether any argument
1441 * is absurd, and defer to the Unicode join if appropriate.
1442 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001443 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001444 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001445 item = PySequence_Fast_GET_ITEM(seq, i);
1446 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001447#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001448 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001449 /* Defer to Unicode join.
1450 * CAUTION: There's no gurantee that the
1451 * original sequence can be iterated over
1452 * again, so we must pass seq here.
1453 */
1454 PyObject *result;
1455 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001456 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001457 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001458 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001459#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001460 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001461 "sequence item %i: expected string,"
1462 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001463 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001464 Py_DECREF(seq);
1465 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001466 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001467 sz += PyString_GET_SIZE(item);
1468 if (i != 0)
1469 sz += seplen;
1470 if (sz < old_sz || sz > INT_MAX) {
1471 PyErr_SetString(PyExc_OverflowError,
1472 "join() is too long for a Python string");
1473 Py_DECREF(seq);
1474 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001476 }
1477
1478 /* Allocate result space. */
1479 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1480 if (res == NULL) {
1481 Py_DECREF(seq);
1482 return NULL;
1483 }
1484
1485 /* Catenate everything. */
1486 p = PyString_AS_STRING(res);
1487 for (i = 0; i < seqlen; ++i) {
1488 size_t n;
1489 item = PySequence_Fast_GET_ITEM(seq, i);
1490 n = PyString_GET_SIZE(item);
1491 memcpy(p, PyString_AS_STRING(item), n);
1492 p += n;
1493 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001494 memcpy(p, sep, seplen);
1495 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001496 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001498
Jeremy Hylton49048292000-07-11 03:28:17 +00001499 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501}
1502
Tim Peters52e155e2001-06-16 05:42:57 +00001503PyObject *
1504_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001505{
Tim Petersa7259592001-06-16 05:11:17 +00001506 assert(sep != NULL && PyString_Check(sep));
1507 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001508 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001509}
1510
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001511static void
1512string_adjust_indices(int *start, int *end, int len)
1513{
1514 if (*end > len)
1515 *end = len;
1516 else if (*end < 0)
1517 *end += len;
1518 if (*end < 0)
1519 *end = 0;
1520 if (*start < 0)
1521 *start += len;
1522 if (*start < 0)
1523 *start = 0;
1524}
1525
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526static long
Fred Drakeba096332000-07-09 07:04:36 +00001527string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001529 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 int len = PyString_GET_SIZE(self);
1531 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001532 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001534 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001535 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001536 return -2;
1537 if (PyString_Check(subobj)) {
1538 sub = PyString_AS_STRING(subobj);
1539 n = PyString_GET_SIZE(subobj);
1540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001541#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001543 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001544#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 return -2;
1547
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001548 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549
Guido van Rossum4c08d552000-03-10 22:55:18 +00001550 if (dir > 0) {
1551 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001553 last -= n;
1554 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001555 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 return (long)i;
1557 }
1558 else {
1559 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001560
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 if (n == 0 && i <= last)
1562 return (long)last;
1563 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001564 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 return (long)j;
1566 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001567
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 return -1;
1569}
1570
1571
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001572PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573"S.find(sub [,start [,end]]) -> int\n\
1574\n\
1575Return the lowest index in S where substring sub is found,\n\
1576such that sub is contained within s[start,end]. Optional\n\
1577arguments start and end are interpreted as in slice notation.\n\
1578\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001579Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580
1581static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001582string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 if (result == -2)
1586 return NULL;
1587 return PyInt_FromLong(result);
1588}
1589
1590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.index(sub [,start [,end]]) -> int\n\
1593\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001594Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595
1596static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001597string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001599 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 if (result == -2)
1601 return NULL;
1602 if (result == -1) {
1603 PyErr_SetString(PyExc_ValueError,
1604 "substring not found in string.index");
1605 return NULL;
1606 }
1607 return PyInt_FromLong(result);
1608}
1609
1610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001611PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612"S.rfind(sub [,start [,end]]) -> int\n\
1613\n\
1614Return the highest index in S where substring sub is found,\n\
1615such that sub is contained within s[start,end]. Optional\n\
1616arguments start and end are interpreted as in slice notation.\n\
1617\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
1620static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001621string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001623 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 if (result == -2)
1625 return NULL;
1626 return PyInt_FromLong(result);
1627}
1628
1629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001630PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631"S.rindex(sub [,start [,end]]) -> int\n\
1632\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001633Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634
1635static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001636string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001638 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 if (result == -2)
1640 return NULL;
1641 if (result == -1) {
1642 PyErr_SetString(PyExc_ValueError,
1643 "substring not found in string.rindex");
1644 return NULL;
1645 }
1646 return PyInt_FromLong(result);
1647}
1648
1649
1650static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001651do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1652{
1653 char *s = PyString_AS_STRING(self);
1654 int len = PyString_GET_SIZE(self);
1655 char *sep = PyString_AS_STRING(sepobj);
1656 int seplen = PyString_GET_SIZE(sepobj);
1657 int i, j;
1658
1659 i = 0;
1660 if (striptype != RIGHTSTRIP) {
1661 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1662 i++;
1663 }
1664 }
1665
1666 j = len;
1667 if (striptype != LEFTSTRIP) {
1668 do {
1669 j--;
1670 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1671 j++;
1672 }
1673
1674 if (i == 0 && j == len && PyString_CheckExact(self)) {
1675 Py_INCREF(self);
1676 return (PyObject*)self;
1677 }
1678 else
1679 return PyString_FromStringAndSize(s+i, j-i);
1680}
1681
1682
1683static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001684do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685{
1686 char *s = PyString_AS_STRING(self);
1687 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 i = 0;
1690 if (striptype != RIGHTSTRIP) {
1691 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1692 i++;
1693 }
1694 }
1695
1696 j = len;
1697 if (striptype != LEFTSTRIP) {
1698 do {
1699 j--;
1700 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1701 j++;
1702 }
1703
Tim Peters8fa5dd02001-09-12 02:18:30 +00001704 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 Py_INCREF(self);
1706 return (PyObject*)self;
1707 }
1708 else
1709 return PyString_FromStringAndSize(s+i, j-i);
1710}
1711
1712
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001713static PyObject *
1714do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1715{
1716 PyObject *sep = NULL;
1717
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001718 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001719 return NULL;
1720
1721 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001722 if (PyString_Check(sep))
1723 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001724#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001725 else if (PyUnicode_Check(sep)) {
1726 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1727 PyObject *res;
1728 if (uniself==NULL)
1729 return NULL;
1730 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1731 striptype, sep);
1732 Py_DECREF(uniself);
1733 return res;
1734 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001735#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001736 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001737 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001738#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001739 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001740#else
1741 "%s arg must be None or str",
1742#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001743 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001744 return NULL;
1745 }
1746 return do_xstrip(self, striptype, sep);
1747 }
1748
1749 return do_strip(self, striptype);
1750}
1751
1752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001754"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755\n\
1756Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001757whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001758If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001759If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760
1761static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001762string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001764 if (PyTuple_GET_SIZE(args) == 0)
1765 return do_strip(self, BOTHSTRIP); /* Common case */
1766 else
1767 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768}
1769
1770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001772"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001774Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001775If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001776If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777
1778static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001779string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781 if (PyTuple_GET_SIZE(args) == 0)
1782 return do_strip(self, LEFTSTRIP); /* Common case */
1783 else
1784 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785}
1786
1787
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001788PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001789"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001791Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001792If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001793If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794
1795static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001796string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798 if (PyTuple_GET_SIZE(args) == 0)
1799 return do_strip(self, RIGHTSTRIP); /* Common case */
1800 else
1801 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802}
1803
1804
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001805PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806"S.lower() -> string\n\
1807\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809
1810static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001811string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812{
1813 char *s = PyString_AS_STRING(self), *s_new;
1814 int i, n = PyString_GET_SIZE(self);
1815 PyObject *new;
1816
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 new = PyString_FromStringAndSize(NULL, n);
1818 if (new == NULL)
1819 return NULL;
1820 s_new = PyString_AsString(new);
1821 for (i = 0; i < n; i++) {
1822 int c = Py_CHARMASK(*s++);
1823 if (isupper(c)) {
1824 *s_new = tolower(c);
1825 } else
1826 *s_new = c;
1827 s_new++;
1828 }
1829 return new;
1830}
1831
1832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001833PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834"S.upper() -> string\n\
1835\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001836Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837
1838static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001839string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840{
1841 char *s = PyString_AS_STRING(self), *s_new;
1842 int i, n = PyString_GET_SIZE(self);
1843 PyObject *new;
1844
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845 new = PyString_FromStringAndSize(NULL, n);
1846 if (new == NULL)
1847 return NULL;
1848 s_new = PyString_AsString(new);
1849 for (i = 0; i < n; i++) {
1850 int c = Py_CHARMASK(*s++);
1851 if (islower(c)) {
1852 *s_new = toupper(c);
1853 } else
1854 *s_new = c;
1855 s_new++;
1856 }
1857 return new;
1858}
1859
1860
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001861PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001862"S.title() -> string\n\
1863\n\
1864Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001865characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866
1867static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001868string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869{
1870 char *s = PyString_AS_STRING(self), *s_new;
1871 int i, n = PyString_GET_SIZE(self);
1872 int previous_is_cased = 0;
1873 PyObject *new;
1874
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 new = PyString_FromStringAndSize(NULL, n);
1876 if (new == NULL)
1877 return NULL;
1878 s_new = PyString_AsString(new);
1879 for (i = 0; i < n; i++) {
1880 int c = Py_CHARMASK(*s++);
1881 if (islower(c)) {
1882 if (!previous_is_cased)
1883 c = toupper(c);
1884 previous_is_cased = 1;
1885 } else if (isupper(c)) {
1886 if (previous_is_cased)
1887 c = tolower(c);
1888 previous_is_cased = 1;
1889 } else
1890 previous_is_cased = 0;
1891 *s_new++ = c;
1892 }
1893 return new;
1894}
1895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001896PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897"S.capitalize() -> string\n\
1898\n\
1899Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001900capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901
1902static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001903string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904{
1905 char *s = PyString_AS_STRING(self), *s_new;
1906 int i, n = PyString_GET_SIZE(self);
1907 PyObject *new;
1908
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 new = PyString_FromStringAndSize(NULL, n);
1910 if (new == NULL)
1911 return NULL;
1912 s_new = PyString_AsString(new);
1913 if (0 < n) {
1914 int c = Py_CHARMASK(*s++);
1915 if (islower(c))
1916 *s_new = toupper(c);
1917 else
1918 *s_new = c;
1919 s_new++;
1920 }
1921 for (i = 1; i < n; i++) {
1922 int c = Py_CHARMASK(*s++);
1923 if (isupper(c))
1924 *s_new = tolower(c);
1925 else
1926 *s_new = c;
1927 s_new++;
1928 }
1929 return new;
1930}
1931
1932
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001933PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934"S.count(sub[, start[, end]]) -> int\n\
1935\n\
1936Return the number of occurrences of substring sub in string\n\
1937S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 int len = PyString_GET_SIZE(self), n;
1945 int i = 0, last = INT_MAX;
1946 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001947 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948
Guido van Rossumc6821402000-05-08 14:08:05 +00001949 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1950 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001952
Guido van Rossum4c08d552000-03-10 22:55:18 +00001953 if (PyString_Check(subobj)) {
1954 sub = PyString_AS_STRING(subobj);
1955 n = PyString_GET_SIZE(subobj);
1956 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001957#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001958 else if (PyUnicode_Check(subobj)) {
1959 int count;
1960 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1961 if (count == -1)
1962 return NULL;
1963 else
1964 return PyInt_FromLong((long) count);
1965 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001966#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001967 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1968 return NULL;
1969
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001970 string_adjust_indices(&i, &last, len);
1971
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 m = last + 1 - n;
1973 if (n == 0)
1974 return PyInt_FromLong((long) (m-i));
1975
1976 r = 0;
1977 while (i < m) {
1978 if (!memcmp(s+i, sub, n)) {
1979 r++;
1980 i += n;
1981 } else {
1982 i++;
1983 }
1984 }
1985 return PyInt_FromLong((long) r);
1986}
1987
1988
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001989PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990"S.swapcase() -> string\n\
1991\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001992Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001993converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994
1995static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001996string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997{
1998 char *s = PyString_AS_STRING(self), *s_new;
1999 int i, n = PyString_GET_SIZE(self);
2000 PyObject *new;
2001
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002 new = PyString_FromStringAndSize(NULL, n);
2003 if (new == NULL)
2004 return NULL;
2005 s_new = PyString_AsString(new);
2006 for (i = 0; i < n; i++) {
2007 int c = Py_CHARMASK(*s++);
2008 if (islower(c)) {
2009 *s_new = toupper(c);
2010 }
2011 else if (isupper(c)) {
2012 *s_new = tolower(c);
2013 }
2014 else
2015 *s_new = c;
2016 s_new++;
2017 }
2018 return new;
2019}
2020
2021
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002022PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023"S.translate(table [,deletechars]) -> string\n\
2024\n\
2025Return a copy of the string S, where all characters occurring\n\
2026in the optional argument deletechars are removed, and the\n\
2027remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029
2030static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002031string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002033 register char *input, *output;
2034 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 register int i, c, changed = 0;
2036 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038 int inlen, tablen, dellen = 0;
2039 PyObject *result;
2040 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002041 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 if (!PyArg_ParseTuple(args, "O|O:translate",
2044 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002046
2047 if (PyString_Check(tableobj)) {
2048 table1 = PyString_AS_STRING(tableobj);
2049 tablen = PyString_GET_SIZE(tableobj);
2050 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002051#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002053 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054 parameter; instead a mapping to None will cause characters
2055 to be deleted. */
2056 if (delobj != NULL) {
2057 PyErr_SetString(PyExc_TypeError,
2058 "deletions are implemented differently for unicode");
2059 return NULL;
2060 }
2061 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2062 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002063#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002066
2067 if (delobj != NULL) {
2068 if (PyString_Check(delobj)) {
2069 del_table = PyString_AS_STRING(delobj);
2070 dellen = PyString_GET_SIZE(delobj);
2071 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002072#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 else if (PyUnicode_Check(delobj)) {
2074 PyErr_SetString(PyExc_TypeError,
2075 "deletions are implemented differently for unicode");
2076 return NULL;
2077 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002078#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2080 return NULL;
2081
2082 if (tablen != 256) {
2083 PyErr_SetString(PyExc_ValueError,
2084 "translation table must be 256 characters long");
2085 return NULL;
2086 }
2087 }
2088 else {
2089 del_table = NULL;
2090 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091 }
2092
2093 table = table1;
2094 inlen = PyString_Size(input_obj);
2095 result = PyString_FromStringAndSize((char *)NULL, inlen);
2096 if (result == NULL)
2097 return NULL;
2098 output_start = output = PyString_AsString(result);
2099 input = PyString_AsString(input_obj);
2100
2101 if (dellen == 0) {
2102 /* If no deletions are required, use faster code */
2103 for (i = inlen; --i >= 0; ) {
2104 c = Py_CHARMASK(*input++);
2105 if (Py_CHARMASK((*output++ = table[c])) != c)
2106 changed = 1;
2107 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002108 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109 return result;
2110 Py_DECREF(result);
2111 Py_INCREF(input_obj);
2112 return input_obj;
2113 }
2114
2115 for (i = 0; i < 256; i++)
2116 trans_table[i] = Py_CHARMASK(table[i]);
2117
2118 for (i = 0; i < dellen; i++)
2119 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2120
2121 for (i = inlen; --i >= 0; ) {
2122 c = Py_CHARMASK(*input++);
2123 if (trans_table[c] != -1)
2124 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2125 continue;
2126 changed = 1;
2127 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002128 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 Py_DECREF(result);
2130 Py_INCREF(input_obj);
2131 return input_obj;
2132 }
2133 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002134 if (inlen > 0)
2135 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 return result;
2137}
2138
2139
2140/* What follows is used for implementing replace(). Perry Stoll. */
2141
2142/*
2143 mymemfind
2144
2145 strstr replacement for arbitrary blocks of memory.
2146
Barry Warsaw51ac5802000-03-20 16:36:48 +00002147 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 contents of memory pointed to by PAT. Returns the index into MEM if
2149 found, or -1 if not found. If len of PAT is greater than length of
2150 MEM, the function returns -1.
2151*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002152static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002153mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154{
2155 register int ii;
2156
2157 /* pattern can not occur in the last pat_len-1 chars */
2158 len -= pat_len;
2159
2160 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002161 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162 return ii;
2163 }
2164 }
2165 return -1;
2166}
2167
2168/*
2169 mymemcnt
2170
2171 Return the number of distinct times PAT is found in MEM.
2172 meaning mem=1111 and pat==11 returns 2.
2173 mem=11111 and pat==11 also return 2.
2174 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002175static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002176mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177{
2178 register int offset = 0;
2179 int nfound = 0;
2180
2181 while (len >= 0) {
2182 offset = mymemfind(mem, len, pat, pat_len);
2183 if (offset == -1)
2184 break;
2185 mem += offset + pat_len;
2186 len -= offset + pat_len;
2187 nfound++;
2188 }
2189 return nfound;
2190}
2191
2192/*
2193 mymemreplace
2194
Thomas Wouters7e474022000-07-16 12:04:32 +00002195 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196 replaced with SUB.
2197
Thomas Wouters7e474022000-07-16 12:04:32 +00002198 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 of PAT in STR, then the original string is returned. Otherwise, a new
2200 string is allocated here and returned.
2201
2202 on return, out_len is:
2203 the length of output string, or
2204 -1 if the input string is returned, or
2205 unchanged if an error occurs (no memory).
2206
2207 return value is:
2208 the new string allocated locally, or
2209 NULL if an error occurred.
2210*/
2211static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002212mymemreplace(const char *str, int len, /* input string */
2213 const char *pat, int pat_len, /* pattern string to find */
2214 const char *sub, int sub_len, /* substitution string */
2215 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002216 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
2218 char *out_s;
2219 char *new_s;
2220 int nfound, offset, new_len;
2221
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002222 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 goto return_same;
2224
2225 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002226 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002227 if (count < 0)
2228 count = INT_MAX;
2229 else if (nfound > count)
2230 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 if (nfound == 0)
2232 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002233
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235 if (new_len == 0) {
2236 /* Have to allocate something for the caller to free(). */
2237 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002238 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002239 return NULL;
2240 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002242 else {
2243 assert(new_len > 0);
2244 new_s = (char *)PyMem_MALLOC(new_len);
2245 if (new_s == NULL)
2246 return NULL;
2247 out_s = new_s;
2248
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002249 if (pat_len > 0) {
2250 for (; nfound > 0; --nfound) {
2251 /* find index of next instance of pattern */
2252 offset = mymemfind(str, len, pat, pat_len);
2253 if (offset == -1)
2254 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002255
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002256 /* copy non matching part of input string */
2257 memcpy(new_s, str, offset);
2258 str += offset + pat_len;
2259 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002260
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002261 /* copy substitute into the output string */
2262 new_s += offset;
2263 memcpy(new_s, sub, sub_len);
2264 new_s += sub_len;
2265 }
2266 /* copy any remaining values into output string */
2267 if (len > 0)
2268 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002269 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002270 else {
2271 for (;;++str, --len) {
2272 memcpy(new_s, sub, sub_len);
2273 new_s += sub_len;
2274 if (--nfound <= 0) {
2275 memcpy(new_s, str, len);
2276 break;
2277 }
2278 *new_s++ = *str;
2279 }
2280 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002281 }
2282 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283 return out_s;
2284
2285 return_same:
2286 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002287 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288}
2289
2290
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002291PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292"S.replace (old, new[, maxsplit]) -> string\n\
2293\n\
2294Return a copy of string S with all occurrences of substring\n\
2295old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002296given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297
2298static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002299string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 const char *str = PyString_AS_STRING(self), *sub, *repl;
2302 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002303 const int len = PyString_GET_SIZE(self);
2304 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 if (!PyArg_ParseTuple(args, "OO|i:replace",
2310 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312
2313 if (PyString_Check(subobj)) {
2314 sub = PyString_AS_STRING(subobj);
2315 sub_len = PyString_GET_SIZE(subobj);
2316 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002317#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002319 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002321#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2323 return NULL;
2324
2325 if (PyString_Check(replobj)) {
2326 repl = PyString_AS_STRING(replobj);
2327 repl_len = PyString_GET_SIZE(replobj);
2328 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002329#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002330 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002331 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002332 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002333#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2335 return NULL;
2336
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 if (new_s == NULL) {
2339 PyErr_NoMemory();
2340 return NULL;
2341 }
2342 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002343 if (PyString_CheckExact(self)) {
2344 /* we're returning another reference to self */
2345 new = (PyObject*)self;
2346 Py_INCREF(new);
2347 }
2348 else {
2349 new = PyString_FromStringAndSize(str, len);
2350 if (new == NULL)
2351 return NULL;
2352 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 }
2354 else {
2355 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002356 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 }
2358 return new;
2359}
2360
2361
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002362PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002363"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002365Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002367comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368
2369static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002370string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002372 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 int plen;
2376 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002377 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379
Guido van Rossumc6821402000-05-08 14:08:05 +00002380 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2381 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 return NULL;
2383 if (PyString_Check(subobj)) {
2384 prefix = PyString_AS_STRING(subobj);
2385 plen = PyString_GET_SIZE(subobj);
2386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002387#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002388 else if (PyUnicode_Check(subobj)) {
2389 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002390 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002391 subobj, start, end, -1);
2392 if (rc == -1)
2393 return NULL;
2394 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002395 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 return NULL;
2400
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002401 string_adjust_indices(&start, &end, len);
2402
2403 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002404 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002405
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002406 if (end-start >= plen)
2407 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2408 else
2409 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410}
2411
2412
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002413PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002414"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002416Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002418comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419
2420static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002421string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 const char* suffix;
2426 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002428 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430
Guido van Rossumc6821402000-05-08 14:08:05 +00002431 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2432 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 return NULL;
2434 if (PyString_Check(subobj)) {
2435 suffix = PyString_AS_STRING(subobj);
2436 slen = PyString_GET_SIZE(subobj);
2437 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002438#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002439 else if (PyUnicode_Check(subobj)) {
2440 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002441 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002442 subobj, start, end, +1);
2443 if (rc == -1)
2444 return NULL;
2445 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002446 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002447 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002448#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450 return NULL;
2451
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002452 string_adjust_indices(&start, &end, len);
2453
2454 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002455 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002457 if (end-slen > start)
2458 start = end - slen;
2459 if (end-start >= slen)
2460 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2461 else
2462 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463}
2464
2465
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002466PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002467"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002468\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002469Encodes S using the codec registered for encoding. encoding defaults\n\
2470to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002471handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002472a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2473'xmlcharrefreplace' as well as any other name registered with\n\
2474codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002475
2476static PyObject *
2477string_encode(PyStringObject *self, PyObject *args)
2478{
2479 char *encoding = NULL;
2480 char *errors = NULL;
2481 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2482 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002483 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2484}
2485
2486
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002487PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002488"S.decode([encoding[,errors]]) -> object\n\
2489\n\
2490Decodes S using the codec registered for encoding. encoding defaults\n\
2491to the default encoding. errors may be given to set a different error\n\
2492handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002493a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2494as well as any other name registerd with codecs.register_error that is\n\
2495able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002496
2497static PyObject *
2498string_decode(PyStringObject *self, PyObject *args)
2499{
2500 char *encoding = NULL;
2501 char *errors = NULL;
2502 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2503 return NULL;
2504 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002505}
2506
2507
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002508PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509"S.expandtabs([tabsize]) -> string\n\
2510\n\
2511Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002512If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513
2514static PyObject*
2515string_expandtabs(PyStringObject *self, PyObject *args)
2516{
2517 const char *e, *p;
2518 char *q;
2519 int i, j;
2520 PyObject *u;
2521 int tabsize = 8;
2522
2523 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2524 return NULL;
2525
Thomas Wouters7e474022000-07-16 12:04:32 +00002526 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 i = j = 0;
2528 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2529 for (p = PyString_AS_STRING(self); p < e; p++)
2530 if (*p == '\t') {
2531 if (tabsize > 0)
2532 j += tabsize - (j % tabsize);
2533 }
2534 else {
2535 j++;
2536 if (*p == '\n' || *p == '\r') {
2537 i += j;
2538 j = 0;
2539 }
2540 }
2541
2542 /* Second pass: create output string and fill it */
2543 u = PyString_FromStringAndSize(NULL, i + j);
2544 if (!u)
2545 return NULL;
2546
2547 j = 0;
2548 q = PyString_AS_STRING(u);
2549
2550 for (p = PyString_AS_STRING(self); p < e; p++)
2551 if (*p == '\t') {
2552 if (tabsize > 0) {
2553 i = tabsize - (j % tabsize);
2554 j += i;
2555 while (i--)
2556 *q++ = ' ';
2557 }
2558 }
2559 else {
2560 j++;
2561 *q++ = *p;
2562 if (*p == '\n' || *p == '\r')
2563 j = 0;
2564 }
2565
2566 return u;
2567}
2568
Tim Peters8fa5dd02001-09-12 02:18:30 +00002569static PyObject *
2570pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571{
2572 PyObject *u;
2573
2574 if (left < 0)
2575 left = 0;
2576 if (right < 0)
2577 right = 0;
2578
Tim Peters8fa5dd02001-09-12 02:18:30 +00002579 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580 Py_INCREF(self);
2581 return (PyObject *)self;
2582 }
2583
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002584 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585 left + PyString_GET_SIZE(self) + right);
2586 if (u) {
2587 if (left)
2588 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002589 memcpy(PyString_AS_STRING(u) + left,
2590 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 PyString_GET_SIZE(self));
2592 if (right)
2593 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2594 fill, right);
2595 }
2596
2597 return u;
2598}
2599
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002600PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002601"S.ljust(width) -> string\n"
2602"\n"
2603"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002604"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002605
2606static PyObject *
2607string_ljust(PyStringObject *self, PyObject *args)
2608{
2609 int width;
2610 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2611 return NULL;
2612
Tim Peters8fa5dd02001-09-12 02:18:30 +00002613 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 Py_INCREF(self);
2615 return (PyObject*) self;
2616 }
2617
2618 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2619}
2620
2621
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002622PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002623"S.rjust(width) -> string\n"
2624"\n"
2625"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002626"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002627
2628static PyObject *
2629string_rjust(PyStringObject *self, PyObject *args)
2630{
2631 int width;
2632 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2633 return NULL;
2634
Tim Peters8fa5dd02001-09-12 02:18:30 +00002635 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002636 Py_INCREF(self);
2637 return (PyObject*) self;
2638 }
2639
2640 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2641}
2642
2643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002644PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002645"S.center(width) -> string\n"
2646"\n"
2647"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002648"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649
2650static PyObject *
2651string_center(PyStringObject *self, PyObject *args)
2652{
2653 int marg, left;
2654 int width;
2655
2656 if (!PyArg_ParseTuple(args, "i:center", &width))
2657 return NULL;
2658
Tim Peters8fa5dd02001-09-12 02:18:30 +00002659 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660 Py_INCREF(self);
2661 return (PyObject*) self;
2662 }
2663
2664 marg = width - PyString_GET_SIZE(self);
2665 left = marg / 2 + (marg & width & 1);
2666
2667 return pad(self, left, marg - left, ' ');
2668}
2669
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002670PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002671"S.zfill(width) -> string\n"
2672"\n"
2673"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002674"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002675
2676static PyObject *
2677string_zfill(PyStringObject *self, PyObject *args)
2678{
2679 int fill;
2680 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002681 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002682
2683 int width;
2684 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2685 return NULL;
2686
2687 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002688 if (PyString_CheckExact(self)) {
2689 Py_INCREF(self);
2690 return (PyObject*) self;
2691 }
2692 else
2693 return PyString_FromStringAndSize(
2694 PyString_AS_STRING(self),
2695 PyString_GET_SIZE(self)
2696 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002697 }
2698
2699 fill = width - PyString_GET_SIZE(self);
2700
2701 s = pad(self, fill, 0, '0');
2702
2703 if (s == NULL)
2704 return NULL;
2705
2706 p = PyString_AS_STRING(s);
2707 if (p[fill] == '+' || p[fill] == '-') {
2708 /* move sign to beginning of string */
2709 p[0] = p[fill];
2710 p[fill] = '0';
2711 }
2712
2713 return (PyObject*) s;
2714}
2715
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002716PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002717"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002718"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002719"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002720"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002721
2722static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002723string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724{
Fred Drakeba096332000-07-09 07:04:36 +00002725 register const unsigned char *p
2726 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002727 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002728
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729 /* Shortcut for single character strings */
2730 if (PyString_GET_SIZE(self) == 1 &&
2731 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002732 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002734 /* Special case for empty strings */
2735 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002736 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002737
Guido van Rossum4c08d552000-03-10 22:55:18 +00002738 e = p + PyString_GET_SIZE(self);
2739 for (; p < e; p++) {
2740 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002741 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002742 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002743 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744}
2745
2746
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002747PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002748"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002749\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002750Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002751and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002752
2753static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002754string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002755{
Fred Drakeba096332000-07-09 07:04:36 +00002756 register const unsigned char *p
2757 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002758 register const unsigned char *e;
2759
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002760 /* Shortcut for single character strings */
2761 if (PyString_GET_SIZE(self) == 1 &&
2762 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002763 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002764
2765 /* Special case for empty strings */
2766 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002768
2769 e = p + PyString_GET_SIZE(self);
2770 for (; p < e; p++) {
2771 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002772 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002773 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002774 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002775}
2776
2777
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002778PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002779"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002780\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002781Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002782and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002783
2784static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002785string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002786{
Fred Drakeba096332000-07-09 07:04:36 +00002787 register const unsigned char *p
2788 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002789 register const unsigned char *e;
2790
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002791 /* Shortcut for single character strings */
2792 if (PyString_GET_SIZE(self) == 1 &&
2793 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002794 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002795
2796 /* Special case for empty strings */
2797 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002799
2800 e = p + PyString_GET_SIZE(self);
2801 for (; p < e; p++) {
2802 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002803 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002804 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002805 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002806}
2807
2808
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002809PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002810"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002812Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002813False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814
2815static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002816string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002817{
Fred Drakeba096332000-07-09 07:04:36 +00002818 register const unsigned char *p
2819 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002820 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002821
Guido van Rossum4c08d552000-03-10 22:55:18 +00002822 /* Shortcut for single character strings */
2823 if (PyString_GET_SIZE(self) == 1 &&
2824 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002825 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002826
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002827 /* Special case for empty strings */
2828 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002830
Guido van Rossum4c08d552000-03-10 22:55:18 +00002831 e = p + PyString_GET_SIZE(self);
2832 for (; p < e; p++) {
2833 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002834 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002836 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837}
2838
2839
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002840PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002841"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002842\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002843Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002844at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
2846static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002847string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002848{
Fred Drakeba096332000-07-09 07:04:36 +00002849 register const unsigned char *p
2850 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002851 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852 int cased;
2853
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854 /* Shortcut for single character strings */
2855 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002856 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002858 /* Special case for empty strings */
2859 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002861
Guido van Rossum4c08d552000-03-10 22:55:18 +00002862 e = p + PyString_GET_SIZE(self);
2863 cased = 0;
2864 for (; p < e; p++) {
2865 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002866 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867 else if (!cased && islower(*p))
2868 cased = 1;
2869 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002870 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871}
2872
2873
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002874PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002875"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002877Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002878at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002879
2880static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002881string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882{
Fred Drakeba096332000-07-09 07:04:36 +00002883 register const unsigned char *p
2884 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002885 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 int cased;
2887
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888 /* Shortcut for single character strings */
2889 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002890 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002892 /* Special case for empty strings */
2893 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002894 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002895
Guido van Rossum4c08d552000-03-10 22:55:18 +00002896 e = p + PyString_GET_SIZE(self);
2897 cased = 0;
2898 for (; p < e; p++) {
2899 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002900 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901 else if (!cased && isupper(*p))
2902 cased = 1;
2903 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002904 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905}
2906
2907
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002908PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002909"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002911Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002912may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002913ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002914
2915static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002916string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002917{
Fred Drakeba096332000-07-09 07:04:36 +00002918 register const unsigned char *p
2919 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002920 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002921 int cased, previous_is_cased;
2922
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923 /* Shortcut for single character strings */
2924 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002925 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002927 /* Special case for empty strings */
2928 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002929 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002930
Guido van Rossum4c08d552000-03-10 22:55:18 +00002931 e = p + PyString_GET_SIZE(self);
2932 cased = 0;
2933 previous_is_cased = 0;
2934 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002935 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002936
2937 if (isupper(ch)) {
2938 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002939 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002940 previous_is_cased = 1;
2941 cased = 1;
2942 }
2943 else if (islower(ch)) {
2944 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002945 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946 previous_is_cased = 1;
2947 cased = 1;
2948 }
2949 else
2950 previous_is_cased = 0;
2951 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002952 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002953}
2954
2955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002956PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002957"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002958\n\
2959Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002960Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002961is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002962
2963#define SPLIT_APPEND(data, left, right) \
2964 str = PyString_FromStringAndSize(data + left, right - left); \
2965 if (!str) \
2966 goto onError; \
2967 if (PyList_Append(list, str)) { \
2968 Py_DECREF(str); \
2969 goto onError; \
2970 } \
2971 else \
2972 Py_DECREF(str);
2973
2974static PyObject*
2975string_splitlines(PyStringObject *self, PyObject *args)
2976{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977 register int i;
2978 register int j;
2979 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002980 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981 PyObject *list;
2982 PyObject *str;
2983 char *data;
2984
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002985 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002986 return NULL;
2987
2988 data = PyString_AS_STRING(self);
2989 len = PyString_GET_SIZE(self);
2990
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991 list = PyList_New(0);
2992 if (!list)
2993 goto onError;
2994
2995 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002996 int eol;
2997
Guido van Rossum4c08d552000-03-10 22:55:18 +00002998 /* Find a line and append it */
2999 while (i < len && data[i] != '\n' && data[i] != '\r')
3000 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001
3002 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003003 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003004 if (i < len) {
3005 if (data[i] == '\r' && i + 1 < len &&
3006 data[i+1] == '\n')
3007 i += 2;
3008 else
3009 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003010 if (keepends)
3011 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003012 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003013 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014 j = i;
3015 }
3016 if (j < len) {
3017 SPLIT_APPEND(data, j, len);
3018 }
3019
3020 return list;
3021
3022 onError:
3023 Py_DECREF(list);
3024 return NULL;
3025}
3026
3027#undef SPLIT_APPEND
3028
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003029
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003030static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003031string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003032 /* Counterparts of the obsolete stropmodule functions; except
3033 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003034 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3035 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3036 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3037 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003038 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3039 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3040 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3041 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3042 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3043 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3044 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003045 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3046 capitalize__doc__},
3047 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3048 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3049 endswith__doc__},
3050 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3051 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3052 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3053 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3054 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3055 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3056 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3057 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3058 startswith__doc__},
3059 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3060 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3061 swapcase__doc__},
3062 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3063 translate__doc__},
3064 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3065 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3066 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3067 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3068 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3069 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3070 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3071 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3072 expandtabs__doc__},
3073 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3074 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075 {NULL, NULL} /* sentinel */
3076};
3077
Jeremy Hylton938ace62002-07-17 16:30:39 +00003078static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003079str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3080
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003081static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003082string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003083{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003084 PyObject *x = NULL;
3085 static char *kwlist[] = {"object", 0};
3086
Guido van Rossumae960af2001-08-30 03:11:59 +00003087 if (type != &PyString_Type)
3088 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003089 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3090 return NULL;
3091 if (x == NULL)
3092 return PyString_FromString("");
3093 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003094}
3095
Guido van Rossumae960af2001-08-30 03:11:59 +00003096static PyObject *
3097str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3098{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003099 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003100 int n;
3101
3102 assert(PyType_IsSubtype(type, &PyString_Type));
3103 tmp = string_new(&PyString_Type, args, kwds);
3104 if (tmp == NULL)
3105 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003106 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003107 n = PyString_GET_SIZE(tmp);
3108 pnew = type->tp_alloc(type, n);
3109 if (pnew != NULL) {
3110 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003111 ((PyStringObject *)pnew)->ob_shash =
3112 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003113 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003114 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003115 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003116 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003117}
3118
Guido van Rossumcacfc072002-05-24 19:01:59 +00003119static PyObject *
3120basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3121{
3122 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003123 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003124 return NULL;
3125}
3126
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003127PyDoc_STRVAR(basestring_doc,
3128"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003129
3130PyTypeObject PyBaseString_Type = {
3131 PyObject_HEAD_INIT(&PyType_Type)
3132 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003133 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003134 0,
3135 0,
3136 0, /* tp_dealloc */
3137 0, /* tp_print */
3138 0, /* tp_getattr */
3139 0, /* tp_setattr */
3140 0, /* tp_compare */
3141 0, /* tp_repr */
3142 0, /* tp_as_number */
3143 0, /* tp_as_sequence */
3144 0, /* tp_as_mapping */
3145 0, /* tp_hash */
3146 0, /* tp_call */
3147 0, /* tp_str */
3148 0, /* tp_getattro */
3149 0, /* tp_setattro */
3150 0, /* tp_as_buffer */
3151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3152 basestring_doc, /* tp_doc */
3153 0, /* tp_traverse */
3154 0, /* tp_clear */
3155 0, /* tp_richcompare */
3156 0, /* tp_weaklistoffset */
3157 0, /* tp_iter */
3158 0, /* tp_iternext */
3159 0, /* tp_methods */
3160 0, /* tp_members */
3161 0, /* tp_getset */
3162 &PyBaseObject_Type, /* tp_base */
3163 0, /* tp_dict */
3164 0, /* tp_descr_get */
3165 0, /* tp_descr_set */
3166 0, /* tp_dictoffset */
3167 0, /* tp_init */
3168 0, /* tp_alloc */
3169 basestring_new, /* tp_new */
3170 0, /* tp_free */
3171};
3172
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003173PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003174"str(object) -> string\n\
3175\n\
3176Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003177If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003178
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003179PyTypeObject PyString_Type = {
3180 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003181 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003182 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003183 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003184 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003185 (destructor)string_dealloc, /* tp_dealloc */
3186 (printfunc)string_print, /* tp_print */
3187 0, /* tp_getattr */
3188 0, /* tp_setattr */
3189 0, /* tp_compare */
3190 (reprfunc)string_repr, /* tp_repr */
3191 0, /* tp_as_number */
3192 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003193 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003194 (hashfunc)string_hash, /* tp_hash */
3195 0, /* tp_call */
3196 (reprfunc)string_str, /* tp_str */
3197 PyObject_GenericGetAttr, /* tp_getattro */
3198 0, /* tp_setattro */
3199 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003200 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003201 string_doc, /* tp_doc */
3202 0, /* tp_traverse */
3203 0, /* tp_clear */
3204 (richcmpfunc)string_richcompare, /* tp_richcompare */
3205 0, /* tp_weaklistoffset */
3206 0, /* tp_iter */
3207 0, /* tp_iternext */
3208 string_methods, /* tp_methods */
3209 0, /* tp_members */
3210 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003211 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003212 0, /* tp_dict */
3213 0, /* tp_descr_get */
3214 0, /* tp_descr_set */
3215 0, /* tp_dictoffset */
3216 0, /* tp_init */
3217 0, /* tp_alloc */
3218 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003219 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003220};
3221
3222void
Fred Drakeba096332000-07-09 07:04:36 +00003223PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003224{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003225 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003226 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003227 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003228 if (w == NULL || !PyString_Check(*pv)) {
3229 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003230 *pv = NULL;
3231 return;
3232 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003233 v = string_concat((PyStringObject *) *pv, w);
3234 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003235 *pv = v;
3236}
3237
Guido van Rossum013142a1994-08-30 08:19:36 +00003238void
Fred Drakeba096332000-07-09 07:04:36 +00003239PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003240{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003241 PyString_Concat(pv, w);
3242 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003243}
3244
3245
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003246/* The following function breaks the notion that strings are immutable:
3247 it changes the size of a string. We get away with this only if there
3248 is only one module referencing the object. You can also think of it
3249 as creating a new string object and destroying the old one, only
3250 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003251 already be known to some other part of the code...
3252 Note that if there's not enough memory to resize the string, the original
3253 string object at *pv is deallocated, *pv is set to NULL, an "out of
3254 memory" exception is set, and -1 is returned. Else (on success) 0 is
3255 returned, and the value in *pv may or may not be the same as on input.
3256 As always, an extra byte is allocated for a trailing \0 byte (newsize
3257 does *not* include that), and a trailing \0 byte is stored.
3258*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003259
3260int
Fred Drakeba096332000-07-09 07:04:36 +00003261_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003262{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 register PyObject *v;
3264 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003265 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003267 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003268 Py_DECREF(v);
3269 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003270 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003271 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003272 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003273 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003274 _Py_ForgetReference(v);
3275 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003276 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003277 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003278 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003279 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003281 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003282 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 _Py_NewReference(*pv);
3284 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003285 sv->ob_size = newsize;
3286 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003287 return 0;
3288}
Guido van Rossume5372401993-03-16 12:15:04 +00003289
3290/* Helpers for formatstring */
3291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003292static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003293getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003294{
3295 int argidx = *p_argidx;
3296 if (argidx < arglen) {
3297 (*p_argidx)++;
3298 if (arglen < 0)
3299 return args;
3300 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003301 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003302 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 PyErr_SetString(PyExc_TypeError,
3304 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003305 return NULL;
3306}
3307
Tim Peters38fd5b62000-09-21 05:43:11 +00003308/* Format codes
3309 * F_LJUST '-'
3310 * F_SIGN '+'
3311 * F_BLANK ' '
3312 * F_ALT '#'
3313 * F_ZERO '0'
3314 */
Guido van Rossume5372401993-03-16 12:15:04 +00003315#define F_LJUST (1<<0)
3316#define F_SIGN (1<<1)
3317#define F_BLANK (1<<2)
3318#define F_ALT (1<<3)
3319#define F_ZERO (1<<4)
3320
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003321static int
Fred Drakeba096332000-07-09 07:04:36 +00003322formatfloat(char *buf, size_t buflen, int flags,
3323 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003324{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003325 /* fmt = '%#.' + `prec` + `type`
3326 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003327 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003328 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003329 x = PyFloat_AsDouble(v);
3330 if (x == -1.0 && PyErr_Occurred()) {
3331 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003332 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003333 }
Guido van Rossume5372401993-03-16 12:15:04 +00003334 if (prec < 0)
3335 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003336 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3337 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003338 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3339 (flags&F_ALT) ? "#" : "",
3340 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003341 /* worst case length calc to ensure no buffer overrun:
3342 fmt = %#.<prec>g
3343 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003344 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003345 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3346 If prec=0 the effective precision is 1 (the leading digit is
3347 always given), therefore increase by one to 10+prec. */
3348 if (buflen <= (size_t)10 + (size_t)prec) {
3349 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003350 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003351 return -1;
3352 }
Tim Peters885d4572001-11-28 20:27:42 +00003353 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003354 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003355}
3356
Tim Peters38fd5b62000-09-21 05:43:11 +00003357/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3358 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3359 * Python's regular ints.
3360 * Return value: a new PyString*, or NULL if error.
3361 * . *pbuf is set to point into it,
3362 * *plen set to the # of chars following that.
3363 * Caller must decref it when done using pbuf.
3364 * The string starting at *pbuf is of the form
3365 * "-"? ("0x" | "0X")? digit+
3366 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003367 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003368 * There will be at least prec digits, zero-filled on the left if
3369 * necessary to get that many.
3370 * val object to be converted
3371 * flags bitmask of format flags; only F_ALT is looked at
3372 * prec minimum number of digits; 0-fill on left if needed
3373 * type a character in [duoxX]; u acts the same as d
3374 *
3375 * CAUTION: o, x and X conversions on regular ints can never
3376 * produce a '-' sign, but can for Python's unbounded ints.
3377 */
3378PyObject*
3379_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3380 char **pbuf, int *plen)
3381{
3382 PyObject *result = NULL;
3383 char *buf;
3384 int i;
3385 int sign; /* 1 if '-', else 0 */
3386 int len; /* number of characters */
3387 int numdigits; /* len == numnondigits + numdigits */
3388 int numnondigits = 0;
3389
3390 switch (type) {
3391 case 'd':
3392 case 'u':
3393 result = val->ob_type->tp_str(val);
3394 break;
3395 case 'o':
3396 result = val->ob_type->tp_as_number->nb_oct(val);
3397 break;
3398 case 'x':
3399 case 'X':
3400 numnondigits = 2;
3401 result = val->ob_type->tp_as_number->nb_hex(val);
3402 break;
3403 default:
3404 assert(!"'type' not in [duoxX]");
3405 }
3406 if (!result)
3407 return NULL;
3408
3409 /* To modify the string in-place, there can only be one reference. */
3410 if (result->ob_refcnt != 1) {
3411 PyErr_BadInternalCall();
3412 return NULL;
3413 }
3414 buf = PyString_AsString(result);
3415 len = PyString_Size(result);
3416 if (buf[len-1] == 'L') {
3417 --len;
3418 buf[len] = '\0';
3419 }
3420 sign = buf[0] == '-';
3421 numnondigits += sign;
3422 numdigits = len - numnondigits;
3423 assert(numdigits > 0);
3424
Tim Petersfff53252001-04-12 18:38:48 +00003425 /* Get rid of base marker unless F_ALT */
3426 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003427 /* Need to skip 0x, 0X or 0. */
3428 int skipped = 0;
3429 switch (type) {
3430 case 'o':
3431 assert(buf[sign] == '0');
3432 /* If 0 is only digit, leave it alone. */
3433 if (numdigits > 1) {
3434 skipped = 1;
3435 --numdigits;
3436 }
3437 break;
3438 case 'x':
3439 case 'X':
3440 assert(buf[sign] == '0');
3441 assert(buf[sign + 1] == 'x');
3442 skipped = 2;
3443 numnondigits -= 2;
3444 break;
3445 }
3446 if (skipped) {
3447 buf += skipped;
3448 len -= skipped;
3449 if (sign)
3450 buf[0] = '-';
3451 }
3452 assert(len == numnondigits + numdigits);
3453 assert(numdigits > 0);
3454 }
3455
3456 /* Fill with leading zeroes to meet minimum width. */
3457 if (prec > numdigits) {
3458 PyObject *r1 = PyString_FromStringAndSize(NULL,
3459 numnondigits + prec);
3460 char *b1;
3461 if (!r1) {
3462 Py_DECREF(result);
3463 return NULL;
3464 }
3465 b1 = PyString_AS_STRING(r1);
3466 for (i = 0; i < numnondigits; ++i)
3467 *b1++ = *buf++;
3468 for (i = 0; i < prec - numdigits; i++)
3469 *b1++ = '0';
3470 for (i = 0; i < numdigits; i++)
3471 *b1++ = *buf++;
3472 *b1 = '\0';
3473 Py_DECREF(result);
3474 result = r1;
3475 buf = PyString_AS_STRING(result);
3476 len = numnondigits + prec;
3477 }
3478
3479 /* Fix up case for hex conversions. */
3480 switch (type) {
3481 case 'x':
3482 /* Need to convert all upper case letters to lower case. */
3483 for (i = 0; i < len; i++)
3484 if (buf[i] >= 'A' && buf[i] <= 'F')
3485 buf[i] += 'a'-'A';
3486 break;
3487 case 'X':
3488 /* Need to convert 0x to 0X (and -0x to -0X). */
3489 if (buf[sign + 1] == 'x')
3490 buf[sign + 1] = 'X';
3491 break;
3492 }
3493 *pbuf = buf;
3494 *plen = len;
3495 return result;
3496}
3497
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003498static int
Fred Drakeba096332000-07-09 07:04:36 +00003499formatint(char *buf, size_t buflen, int flags,
3500 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003501{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003502 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003503 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3504 + 1 + 1 = 24 */
3505 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003506 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003507
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003508 x = PyInt_AsLong(v);
3509 if (x == -1 && PyErr_Occurred()) {
3510 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003511 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003512 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003513 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003514 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003515 "%u/%o/%x/%X of negative int will return "
3516 "a signed string in Python 2.4 and up") < 0)
3517 return -1;
3518 }
Guido van Rossume5372401993-03-16 12:15:04 +00003519 if (prec < 0)
3520 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003521
3522 if ((flags & F_ALT) &&
3523 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003524 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003525 * of issues that cause pain:
3526 * - when 0 is being converted, the C standard leaves off
3527 * the '0x' or '0X', which is inconsistent with other
3528 * %#x/%#X conversions and inconsistent with Python's
3529 * hex() function
3530 * - there are platforms that violate the standard and
3531 * convert 0 with the '0x' or '0X'
3532 * (Metrowerks, Compaq Tru64)
3533 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003534 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003535 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003536 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003537 * We can achieve the desired consistency by inserting our
3538 * own '0x' or '0X' prefix, and substituting %x/%X in place
3539 * of %#x/%#X.
3540 *
3541 * Note that this is the same approach as used in
3542 * formatint() in unicodeobject.c
3543 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003544 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003545 type, prec, type);
3546 }
3547 else {
3548 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003549 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003550 prec, type);
3551 }
3552
Tim Peters38fd5b62000-09-21 05:43:11 +00003553 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003554 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3555 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003556 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003557 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003558 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003559 return -1;
3560 }
Tim Peters885d4572001-11-28 20:27:42 +00003561 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003562 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003563}
3564
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003565static int
Fred Drakeba096332000-07-09 07:04:36 +00003566formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003567{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003568 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 if (PyString_Check(v)) {
3570 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003571 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003572 }
3573 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003574 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003575 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003576 }
3577 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003578 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003579}
3580
Guido van Rossum013142a1994-08-30 08:19:36 +00003581
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003582/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3583
3584 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3585 chars are formatted. XXX This is a magic number. Each formatting
3586 routine does bounds checking to ensure no overflow, but a better
3587 solution may be to malloc a buffer of appropriate size for each
3588 format. For now, the current solution is sufficient.
3589*/
3590#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003591
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003592PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003593PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003594{
3595 char *fmt, *res;
3596 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003597 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003598 PyObject *result, *orig_args;
3599#ifdef Py_USING_UNICODE
3600 PyObject *v, *w;
3601#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003602 PyObject *dict = NULL;
3603 if (format == NULL || !PyString_Check(format) || args == NULL) {
3604 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003605 return NULL;
3606 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003607 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003608 fmt = PyString_AS_STRING(format);
3609 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003610 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003611 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003612 if (result == NULL)
3613 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003614 res = PyString_AsString(result);
3615 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003616 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003617 argidx = 0;
3618 }
3619 else {
3620 arglen = -1;
3621 argidx = -2;
3622 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003623 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003624 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003625 while (--fmtcnt >= 0) {
3626 if (*fmt != '%') {
3627 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003628 rescnt = fmtcnt + 100;
3629 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003630 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003631 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003632 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003633 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003634 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003635 }
3636 *res++ = *fmt++;
3637 }
3638 else {
3639 /* Got a format specifier */
3640 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003641 int width = -1;
3642 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003643 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003644 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003645 PyObject *v = NULL;
3646 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003647 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003648 int sign;
3649 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003650 char formatbuf[FORMATBUFLEN];
3651 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003652#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003653 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003654 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003655#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003656
Guido van Rossumda9c2711996-12-05 21:58:58 +00003657 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003658 if (*fmt == '(') {
3659 char *keystart;
3660 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003661 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003662 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003663
3664 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003665 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003666 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003667 goto error;
3668 }
3669 ++fmt;
3670 --fmtcnt;
3671 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003672 /* Skip over balanced parentheses */
3673 while (pcount > 0 && --fmtcnt >= 0) {
3674 if (*fmt == ')')
3675 --pcount;
3676 else if (*fmt == '(')
3677 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003678 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003679 }
3680 keylen = fmt - keystart - 1;
3681 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003682 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003683 "incomplete format key");
3684 goto error;
3685 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003686 key = PyString_FromStringAndSize(keystart,
3687 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003688 if (key == NULL)
3689 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003690 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003691 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003692 args_owned = 0;
3693 }
3694 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003695 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003696 if (args == NULL) {
3697 goto error;
3698 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003699 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003700 arglen = -1;
3701 argidx = -2;
3702 }
Guido van Rossume5372401993-03-16 12:15:04 +00003703 while (--fmtcnt >= 0) {
3704 switch (c = *fmt++) {
3705 case '-': flags |= F_LJUST; continue;
3706 case '+': flags |= F_SIGN; continue;
3707 case ' ': flags |= F_BLANK; continue;
3708 case '#': flags |= F_ALT; continue;
3709 case '0': flags |= F_ZERO; continue;
3710 }
3711 break;
3712 }
3713 if (c == '*') {
3714 v = getnextarg(args, arglen, &argidx);
3715 if (v == NULL)
3716 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003717 if (!PyInt_Check(v)) {
3718 PyErr_SetString(PyExc_TypeError,
3719 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003720 goto error;
3721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003722 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003723 if (width < 0) {
3724 flags |= F_LJUST;
3725 width = -width;
3726 }
Guido van Rossume5372401993-03-16 12:15:04 +00003727 if (--fmtcnt >= 0)
3728 c = *fmt++;
3729 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003730 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003731 width = c - '0';
3732 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003733 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003734 if (!isdigit(c))
3735 break;
3736 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003737 PyErr_SetString(
3738 PyExc_ValueError,
3739 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003740 goto error;
3741 }
3742 width = width*10 + (c - '0');
3743 }
3744 }
3745 if (c == '.') {
3746 prec = 0;
3747 if (--fmtcnt >= 0)
3748 c = *fmt++;
3749 if (c == '*') {
3750 v = getnextarg(args, arglen, &argidx);
3751 if (v == NULL)
3752 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003753 if (!PyInt_Check(v)) {
3754 PyErr_SetString(
3755 PyExc_TypeError,
3756 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003757 goto error;
3758 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003759 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003760 if (prec < 0)
3761 prec = 0;
3762 if (--fmtcnt >= 0)
3763 c = *fmt++;
3764 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003765 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003766 prec = c - '0';
3767 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003768 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003769 if (!isdigit(c))
3770 break;
3771 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003772 PyErr_SetString(
3773 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003774 "prec too big");
3775 goto error;
3776 }
3777 prec = prec*10 + (c - '0');
3778 }
3779 }
3780 } /* prec */
3781 if (fmtcnt >= 0) {
3782 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003783 if (--fmtcnt >= 0)
3784 c = *fmt++;
3785 }
3786 }
3787 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003788 PyErr_SetString(PyExc_ValueError,
3789 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003790 goto error;
3791 }
3792 if (c != '%') {
3793 v = getnextarg(args, arglen, &argidx);
3794 if (v == NULL)
3795 goto error;
3796 }
3797 sign = 0;
3798 fill = ' ';
3799 switch (c) {
3800 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003801 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003802 len = 1;
3803 break;
3804 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003805 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003806#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003807 if (PyUnicode_Check(v)) {
3808 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003809 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003810 goto unicode;
3811 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003812#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003813 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003814 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003815 else
3816 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003817 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003818 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003819 if (!PyString_Check(temp)) {
3820 PyErr_SetString(PyExc_TypeError,
3821 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003822 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003823 goto error;
3824 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003825 pbuf = PyString_AS_STRING(temp);
3826 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003827 if (prec >= 0 && len > prec)
3828 len = prec;
3829 break;
3830 case 'i':
3831 case 'd':
3832 case 'u':
3833 case 'o':
3834 case 'x':
3835 case 'X':
3836 if (c == 'i')
3837 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003838 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003839 temp = _PyString_FormatLong(v, flags,
3840 prec, c, &pbuf, &len);
3841 if (!temp)
3842 goto error;
3843 /* unbounded ints can always produce
3844 a sign character! */
3845 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003846 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003847 else {
3848 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 len = formatint(pbuf,
3850 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003851 flags, prec, c, v);
3852 if (len < 0)
3853 goto error;
3854 /* only d conversion is signed */
3855 sign = c == 'd';
3856 }
3857 if (flags & F_ZERO)
3858 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003859 break;
3860 case 'e':
3861 case 'E':
3862 case 'f':
3863 case 'g':
3864 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003865 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003866 len = formatfloat(pbuf, sizeof(formatbuf),
3867 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003868 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003869 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003870 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003871 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003872 fill = '0';
3873 break;
3874 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003875 pbuf = formatbuf;
3876 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003877 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003878 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003879 break;
3880 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003881 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003882 "unsupported format character '%c' (0x%x) "
3883 "at index %i",
3884 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003885 goto error;
3886 }
3887 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003888 if (*pbuf == '-' || *pbuf == '+') {
3889 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003890 len--;
3891 }
3892 else if (flags & F_SIGN)
3893 sign = '+';
3894 else if (flags & F_BLANK)
3895 sign = ' ';
3896 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003897 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003898 }
3899 if (width < len)
3900 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003901 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003902 reslen -= rescnt;
3903 rescnt = width + fmtcnt + 100;
3904 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003905 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003906 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003907 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003908 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003909 }
3910 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003911 if (fill != ' ')
3912 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003913 rescnt--;
3914 if (width > len)
3915 width--;
3916 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003917 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3918 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003919 assert(pbuf[1] == c);
3920 if (fill != ' ') {
3921 *res++ = *pbuf++;
3922 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003923 }
Tim Petersfff53252001-04-12 18:38:48 +00003924 rescnt -= 2;
3925 width -= 2;
3926 if (width < 0)
3927 width = 0;
3928 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003929 }
3930 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003931 do {
3932 --rescnt;
3933 *res++ = fill;
3934 } while (--width > len);
3935 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003936 if (fill == ' ') {
3937 if (sign)
3938 *res++ = sign;
3939 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003940 (c == 'x' || c == 'X')) {
3941 assert(pbuf[0] == '0');
3942 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003943 *res++ = *pbuf++;
3944 *res++ = *pbuf++;
3945 }
3946 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003947 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003948 res += len;
3949 rescnt -= len;
3950 while (--width >= len) {
3951 --rescnt;
3952 *res++ = ' ';
3953 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003954 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003956 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003957 goto error;
3958 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003959 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003960 } /* '%' */
3961 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003962 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003963 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003964 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003965 goto error;
3966 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003967 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003969 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003970 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003971 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003972
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003973#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003974 unicode:
3975 if (args_owned) {
3976 Py_DECREF(args);
3977 args_owned = 0;
3978 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003979 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003980 if (PyTuple_Check(orig_args) && argidx > 0) {
3981 PyObject *v;
3982 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3983 v = PyTuple_New(n);
3984 if (v == NULL)
3985 goto error;
3986 while (--n >= 0) {
3987 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3988 Py_INCREF(w);
3989 PyTuple_SET_ITEM(v, n, w);
3990 }
3991 args = v;
3992 } else {
3993 Py_INCREF(orig_args);
3994 args = orig_args;
3995 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003996 args_owned = 1;
3997 /* Take what we have of the result and let the Unicode formatting
3998 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003999 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004000 if (_PyString_Resize(&result, rescnt))
4001 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004002 fmtcnt = PyString_GET_SIZE(format) - \
4003 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004004 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4005 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004006 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004007 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004008 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004009 if (v == NULL)
4010 goto error;
4011 /* Paste what we have (result) to what the Unicode formatting
4012 function returned (v) and return the result (or error) */
4013 w = PyUnicode_Concat(result, v);
4014 Py_DECREF(result);
4015 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004016 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004017 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004018#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004019
Guido van Rossume5372401993-03-16 12:15:04 +00004020 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004022 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004023 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004024 }
Guido van Rossume5372401993-03-16 12:15:04 +00004025 return NULL;
4026}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004027
Guido van Rossum2a61e741997-01-18 07:55:05 +00004028void
Fred Drakeba096332000-07-09 07:04:36 +00004029PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004030{
4031 register PyStringObject *s = (PyStringObject *)(*p);
4032 PyObject *t;
4033 if (s == NULL || !PyString_Check(s))
4034 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004035 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004036 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004037 if (interned == NULL) {
4038 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004039 if (interned == NULL) {
4040 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004041 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004042 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004043 }
4044 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4045 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004046 Py_DECREF(*p);
4047 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004048 return;
4049 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004050 /* Ensure that only true string objects appear in the intern dict */
4051 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004052 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4053 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004054 if (t == NULL) {
4055 PyErr_Clear();
4056 return;
Tim Peters111f6092001-09-12 07:54:51 +00004057 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004058 } else {
4059 t = (PyObject*) s;
4060 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004061 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004062
4063 if (PyDict_SetItem(interned, t, t) == 0) {
4064 /* The two references in interned are not counted by
4065 refcnt. The string deallocator will take care of this */
4066 ((PyObject *)t)->ob_refcnt-=2;
4067 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4068 Py_DECREF(*p);
4069 *p = t;
4070 return;
4071 }
4072 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004073 PyErr_Clear();
4074}
4075
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004076void
4077PyString_InternImmortal(PyObject **p)
4078{
4079 PyString_InternInPlace(p);
4080 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4081 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4082 Py_INCREF(*p);
4083 }
4084}
4085
Guido van Rossum2a61e741997-01-18 07:55:05 +00004086
4087PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004088PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004089{
4090 PyObject *s = PyString_FromString(cp);
4091 if (s == NULL)
4092 return NULL;
4093 PyString_InternInPlace(&s);
4094 return s;
4095}
4096
Guido van Rossum8cf04761997-08-02 02:57:45 +00004097void
Fred Drakeba096332000-07-09 07:04:36 +00004098PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004099{
4100 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004101 for (i = 0; i < UCHAR_MAX + 1; i++) {
4102 Py_XDECREF(characters[i]);
4103 characters[i] = NULL;
4104 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004105 Py_XDECREF(nullstring);
4106 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004107}
Barry Warsawa903ad982001-02-23 16:40:48 +00004108
Barry Warsawa903ad982001-02-23 16:40:48 +00004109void _Py_ReleaseInternedStrings(void)
4110{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004111 PyObject *keys;
4112 PyStringObject *s;
4113 int i, n;
4114
4115 if (interned == NULL || !PyDict_Check(interned))
4116 return;
4117 keys = PyDict_Keys(interned);
4118 if (keys == NULL || !PyList_Check(keys)) {
4119 PyErr_Clear();
4120 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004121 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004122
4123 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4124 detector, interned strings are not forcibly deallocated; rather, we
4125 give them their stolen references back, and then clear and DECREF
4126 the interned dict. */
4127
4128 fprintf(stderr, "releasing interned strings\n");
4129 n = PyList_GET_SIZE(keys);
4130 for (i = 0; i < n; i++) {
4131 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4132 switch (s->ob_sstate) {
4133 case SSTATE_NOT_INTERNED:
4134 /* XXX Shouldn't happen */
4135 break;
4136 case SSTATE_INTERNED_IMMORTAL:
4137 s->ob_refcnt += 1;
4138 break;
4139 case SSTATE_INTERNED_MORTAL:
4140 s->ob_refcnt += 2;
4141 break;
4142 default:
4143 Py_FatalError("Inconsistent interned string state.");
4144 }
4145 s->ob_sstate = SSTATE_NOT_INTERNED;
4146 }
4147 Py_DECREF(keys);
4148 PyDict_Clear(interned);
4149 Py_DECREF(interned);
4150 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004151}